Reapply r372285 "GlobalISel: Don't materialize immarg arguments to intrinsics"

This reverts r372314, reapplying r372285 and the commits which depend
on it (r372286-r372293, and r372296-r372297)

This was missing one switch to getTargetConstant in an untested case.

llvm-svn: 372338
This commit is contained in:
Matt Arsenault 2019-09-19 16:26:14 +00:00
parent e0900f285b
commit 3ecab8e455
79 changed files with 5010 additions and 1365 deletions

View File

@ -220,6 +220,11 @@ enum {
/// - OpIdx - Operand index
GIM_CheckIsMBB,
/// Check the specified operand is an Imm
/// - InsnID - Instruction ID
/// - OpIdx - Operand index
GIM_CheckIsImm,
/// Check if the specified operand is safe to fold into the current
/// instruction.
/// - InsnID - Instruction ID

View File

@ -690,7 +690,19 @@ bool InstructionSelector::executeMatchTable(
}
break;
}
case GIM_CheckIsImm: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
dbgs() << CurrentIdx << ": GIM_CheckIsImm(MIs[" << InsnID
<< "]->getOperand(" << OpIdx << "))\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
if (!State.MIs[InsnID]->getOperand(OpIdx).isImm()) {
if (handleReject() == RejectAndGiveUp)
return false;
}
break;
}
case GIM_CheckIsSafeToFold: {
int64_t InsnID = MatchTable[CurrentIdx++];
DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),

View File

@ -374,6 +374,9 @@ namespace llvm {
/// Returns a mask for which lanes get read/written by the given (register)
/// machine operand.
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
/// Returns true if the def register in \p MO has no uses.
bool deadDefHasNoUse(const MachineOperand &MO);
};
/// Creates a new SUnit and return a ptr to it.

View File

@ -848,6 +848,11 @@ class ImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm,
bit IsAPFloat = 0;
}
// Convenience wrapper for ImmLeaf to use timm/TargetConstant instead
// of imm/Constant.
class TImmLeaf<ValueType vt, code pred, SDNodeXForm xform = NOOP_SDNodeXForm,
SDNode ImmNode = timm> : ImmLeaf<vt, pred, xform, ImmNode>;
// An ImmLeaf except that Imm is an APInt. This is useful when you need to
// zero-extend the immediate instead of sign-extend it.
//

View File

@ -1617,14 +1617,29 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (isa<FPMathOperator>(CI))
MIB->copyIRFlags(CI);
for (auto &Arg : CI.arg_operands()) {
for (auto &Arg : enumerate(CI.arg_operands())) {
// Some intrinsics take metadata parameters. Reject them.
if (isa<MetadataAsValue>(Arg))
if (isa<MetadataAsValue>(Arg.value()))
return false;
ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg);
if (VRegs.size() > 1)
return false;
MIB.addUse(VRegs[0]);
// If this is required to be an immediate, don't materialize it in a
// register.
if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
// imm arguments are more convenient than cimm (and realistically
// probably sufficient), so use them.
assert(CI->getBitWidth() <= 64 &&
"large intrinsic immediates not handled");
MIB.addImm(CI->getSExtValue());
} else {
MIB.addFPImm(cast<ConstantFP>(Arg.value()));
}
} else {
ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
if (VRegs.size() > 1)
return false;
MIB.addUse(VRegs[0]);
}
}
// Add a MachineMemOperand if it is a target mem intrinsic.

View File

@ -373,6 +373,13 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
return TRI->getSubRegIndexLaneMask(SubReg);
}
bool ScheduleDAGInstrs::deadDefHasNoUse(const MachineOperand &MO) {
auto RegUse = CurrentVRegUses.find(MO.getReg());
if (RegUse == CurrentVRegUses.end())
return true;
return (RegUse->LaneMask & getLaneMaskForMO(MO)).none();
}
/// Adds register output and data dependencies from this SUnit to instructions
/// that occur later in the same scheduling region if they read from or write to
/// the virtual register defined at OperIdx.
@ -402,8 +409,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
}
if (MO.isDead()) {
assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&
"Dead defs should have no uses");
assert(deadDefHasNoUse(MO) && "Dead defs should have no uses");
} else {
// Add data dependence to all uses we found so far.
const TargetSubtargetInfo &ST = MF.getSubtarget();

View File

@ -4768,8 +4768,22 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Add all operands of the call to the operand list.
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
SDValue Op = getValue(I.getArgOperand(i));
Ops.push_back(Op);
const Value *Arg = I.getArgOperand(i);
if (!I.paramHasAttr(i, Attribute::ImmArg)) {
Ops.push_back(getValue(Arg));
continue;
}
// Use TargetConstant instead of a regular constant for immarg.
EVT VT = TLI.getValueType(*DL, Arg->getType(), true);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
assert(CI->getBitWidth() <= 64 &&
"large intrinsic immediates not handled");
Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT));
} else {
Ops.push_back(
DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT));
}
}
SmallVector<EVT, 4> ValueVTs;

View File

@ -685,11 +685,11 @@ def logical_imm64_not : Operand<i64> {
// iXX_imm0_65535 predicates - True if the immediate is in the range [0,65535].
let ParserMatchClass = AsmImmRange<0, 65535>, PrintMethod = "printImmHex" in {
def i32_imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
def i32_imm0_65535 : Operand<i32>, TImmLeaf<i32, [{
return ((uint32_t)Imm) < 65536;
}]>;
def i64_imm0_65535 : Operand<i64>, ImmLeaf<i64, [{
def i64_imm0_65535 : Operand<i64>, TImmLeaf<i64, [{
return ((uint64_t)Imm) < 65536;
}]>;
}

View File

@ -798,7 +798,7 @@ def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
def HWASAN_CHECK_MEMACCESS : Pseudo<
(outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
[(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 imm:$accessinfo))]>,
[(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
Sched<[]>;
}

View File

@ -22,6 +22,7 @@
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@ -245,10 +246,6 @@ AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO,
}
}
static int64_t getConstant(const MachineInstr *MI) {
return MI->getOperand(1).getCImm()->getSExtValue();
}
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64) {
switch (Opc) {
case AMDGPU::G_AND:
@ -737,6 +734,260 @@ buildEXP(const TargetInstrInfo &TII, MachineInstr *Insert, unsigned Tgt,
.addImm(Enabled);
}
static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
int64_t C;
if (mi_match(Reg, MRI, m_ICst(C)) && C == 0)
return true;
// FIXME: matcher should ignore copies
return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0;
}
static unsigned extractGLC(unsigned CachePolicy) {
return CachePolicy & 1;
}
static unsigned extractSLC(unsigned CachePolicy) {
return (CachePolicy >> 1) & 1;
}
static unsigned extractDLC(unsigned CachePolicy) {
return (CachePolicy >> 2) & 1;
}
// Returns Base register, constant offset, and offset def point.
static std::tuple<Register, unsigned, MachineInstr *>
getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
if (!Def)
return {Reg, 0, nullptr};
if (Def->getOpcode() == AMDGPU::G_CONSTANT) {
unsigned Offset;
const MachineOperand &Op = Def->getOperand(1);
if (Op.isImm())
Offset = Op.getImm();
else
Offset = Op.getCImm()->getZExtValue();
return {Register(), Offset, Def};
}
int64_t Offset;
if (Def->getOpcode() == AMDGPU::G_ADD) {
// TODO: Handle G_OR used for add case
if (mi_match(Def->getOperand(1).getReg(), MRI, m_ICst(Offset)))
return {Def->getOperand(0).getReg(), Offset, Def};
// FIXME: matcher should ignore copies
if (mi_match(Def->getOperand(1).getReg(), MRI, m_Copy(m_ICst(Offset))))
return {Def->getOperand(0).getReg(), Offset, Def};
}
return {Reg, 0, Def};
}
static unsigned getBufferStoreOpcode(LLT Ty,
const unsigned MemSize,
const bool Offen) {
const int Size = Ty.getSizeInBits();
switch (8 * MemSize) {
case 8:
return Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
case 16:
return Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
default:
unsigned Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
if (Size > 32)
Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
return Opc;
}
}
static unsigned getBufferStoreFormatOpcode(LLT Ty,
const unsigned MemSize,
const bool Offen) {
bool IsD16Packed = Ty.getScalarSizeInBits() == 16;
bool IsD16Unpacked = 8 * MemSize < Ty.getSizeInBits();
int NumElts = Ty.isVector() ? Ty.getNumElements() : 1;
if (IsD16Packed) {
switch (NumElts) {
case 1:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
case 2:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_XY_OFFSET_exact;
case 3:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_OFFSET_exact;
case 4:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_OFFSET_exact;
default:
return -1;
}
}
if (IsD16Unpacked) {
switch (NumElts) {
case 1:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_X_OFFSET_exact;
case 2:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFSET_exact;
case 3:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_XYZ_gfx80_OFFSET_exact;
case 4:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFSET_exact;
default:
return -1;
}
}
switch (NumElts) {
case 1:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_X_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_X_OFFSET_exact;
case 2:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XY_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_XY_OFFSET_exact;
case 3:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_XYZ_OFFSET_exact;
case 4:
return Offen ? AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFEN_exact :
AMDGPU::BUFFER_STORE_FORMAT_XYZW_OFFSET_exact;
default:
return -1;
}
llvm_unreachable("unhandled buffer store");
}
// TODO: Move this to combiner
// Returns base register, imm offset, total constant offset.
std::tuple<Register, unsigned, unsigned>
AMDGPUInstructionSelector::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
const unsigned MaxImm = 4095;
Register BaseReg;
unsigned TotalConstOffset;
MachineInstr *OffsetDef;
MachineRegisterInfo &MRI = *B.getMRI();
std::tie(BaseReg, TotalConstOffset, OffsetDef)
= getBaseWithConstantOffset(MRI, OrigOffset);
unsigned ImmOffset = TotalConstOffset;
// If the immediate value is too big for the immoffset field, put the value
// and -4096 into the immoffset field so that the value that is copied/added
// for the voffset field is a multiple of 4096, and it stands more chance
// of being CSEd with the copy/add for another similar load/store.f
// However, do not do that rounding down to a multiple of 4096 if that is a
// negative number, as it appears to be illegal to have a negative offset
// in the vgpr, even if adding the immediate offset makes it positive.
unsigned Overflow = ImmOffset & ~MaxImm;
ImmOffset -= Overflow;
if ((int32_t)Overflow < 0) {
Overflow += ImmOffset;
ImmOffset = 0;
}
if (Overflow != 0) {
// In case this is in a waterfall loop, insert offset code at the def point
// of the offset, not inside the loop.
MachineBasicBlock::iterator OldInsPt = B.getInsertPt();
MachineBasicBlock &OldMBB = B.getMBB();
B.setInstr(*OffsetDef);
if (!BaseReg) {
BaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
B.buildInstr(AMDGPU::V_MOV_B32_e32)
.addDef(BaseReg)
.addImm(Overflow);
} else {
Register OverflowVal = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
B.buildInstr(AMDGPU::V_MOV_B32_e32)
.addDef(OverflowVal)
.addImm(Overflow);
Register NewBaseReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
TII.getAddNoCarry(B.getMBB(), B.getInsertPt(), B.getDebugLoc(), NewBaseReg)
.addReg(BaseReg)
.addReg(OverflowVal, RegState::Kill)
.addImm(0);
BaseReg = NewBaseReg;
}
B.setInsertPt(OldMBB, OldInsPt);
}
return {BaseReg, ImmOffset, TotalConstOffset};
}
bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI,
bool IsFormat) const {
MachineIRBuilder B(MI);
MachineRegisterInfo &MRI = *B.getMRI();
MachineFunction &MF = B.getMF();
Register VData = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(VData);
int Size = Ty.getSizeInBits();
if (Size % 32 != 0)
return false;
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
MachineMemOperand *MMO = *MI.memoperands_begin();
const int MemSize = MMO->getSize();
Register RSrc = MI.getOperand(2).getReg();
Register VOffset = MI.getOperand(3).getReg();
Register SOffset = MI.getOperand(4).getReg();
unsigned CachePolicy = MI.getOperand(5).getImm();
unsigned ImmOffset;
unsigned TotalOffset;
std::tie(VOffset, ImmOffset, TotalOffset) = splitBufferOffsets(B, VOffset);
if (TotalOffset != 0)
MMO = MF.getMachineMemOperand(MMO, TotalOffset, MemSize);
const bool Offen = !isZero(VOffset, MRI);
int Opc = IsFormat ? getBufferStoreFormatOpcode(Ty, MemSize, Offen) :
getBufferStoreOpcode(Ty, MemSize, Offen);
if (Opc == -1)
return false;
MachineInstrBuilder MIB = B.buildInstr(Opc)
.addUse(VData);
if (Offen)
MIB.addUse(VOffset);
MIB.addUse(RSrc)
.addUse(SOffset)
.addImm(ImmOffset)
.addImm(extractGLC(CachePolicy))
.addImm(extractSLC(CachePolicy))
.addImm(0) // tfe: FIXME: Remove from inst
.addImm(extractDLC(CachePolicy))
.addMemOperand(MMO);
MI.eraseFromParent();
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
@ -746,10 +997,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
unsigned IntrinsicID = I.getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::amdgcn_exp: {
int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(7).getReg()));
int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(8).getReg()));
int64_t Tgt = I.getOperand(1).getImm();
int64_t Enabled = I.getOperand(2).getImm();
int64_t Done = I.getOperand(7).getImm();
int64_t VM = I.getOperand(8).getImm();
MachineInstr *Exp = buildEXP(TII, &I, Tgt, I.getOperand(3).getReg(),
I.getOperand(4).getReg(),
@ -762,13 +1013,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
}
case Intrinsic::amdgcn_exp_compr: {
const DebugLoc &DL = I.getDebugLoc();
int64_t Tgt = getConstant(MRI.getVRegDef(I.getOperand(1).getReg()));
int64_t Enabled = getConstant(MRI.getVRegDef(I.getOperand(2).getReg()));
int64_t Tgt = I.getOperand(1).getImm();
int64_t Enabled = I.getOperand(2).getImm();
Register Reg0 = I.getOperand(3).getReg();
Register Reg1 = I.getOperand(4).getReg();
Register Undef = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
int64_t Done = getConstant(MRI.getVRegDef(I.getOperand(5).getReg()));
int64_t VM = getConstant(MRI.getVRegDef(I.getOperand(6).getReg()));
int64_t Done = I.getOperand(5).getImm();
int64_t VM = I.getOperand(6).getImm();
BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Undef);
MachineInstr *Exp = buildEXP(TII, &I, Tgt, Reg0, Reg1, Undef, Undef, VM,
@ -791,6 +1042,10 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MRI.setRegClass(Reg, TRI.getWaveMaskRegClass());
return true;
}
case Intrinsic::amdgcn_raw_buffer_store:
return selectStoreIntrinsic(I, false);
case Intrinsic::amdgcn_raw_buffer_store_format:
return selectStoreIntrinsic(I, true);
default:
return selectImpl(I, *CoverageInfo);
}

View File

@ -35,6 +35,7 @@ class AMDGPUInstrInfo;
class AMDGPURegisterBankInfo;
class GCNSubtarget;
class MachineInstr;
class MachineIRBuilder;
class MachineOperand;
class MachineRegisterInfo;
class SIInstrInfo;
@ -82,6 +83,12 @@ private:
bool selectG_IMPLICIT_DEF(MachineInstr &I) const;
bool selectG_INSERT(MachineInstr &I) const;
bool selectG_INTRINSIC(MachineInstr &I) const;
std::tuple<Register, unsigned, unsigned>
splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const;
bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const;
bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;
bool selectG_ICMP(MachineInstr &I) const;

View File

@ -1751,6 +1751,62 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
return true;
}
/// Handle register layout difference for f16 images for some subtargets.
Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
MachineRegisterInfo &MRI,
Register Reg) const {
if (!ST.hasUnpackedD16VMem())
return Reg;
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
LLT StoreVT = MRI.getType(Reg);
assert(StoreVT.isVector() && StoreVT.getElementType() == S16);
auto Unmerge = B.buildUnmerge(S16, Reg);
SmallVector<Register, 4> WideRegs;
for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
WideRegs.push_back(B.buildAnyExt(S32, Unmerge.getReg(I)).getReg(0));
int NumElts = StoreVT.getNumElements();
return B.buildBuildVector(LLT::vector(NumElts, S32), WideRegs).getReg(0);
}
bool AMDGPULegalizerInfo::legalizeRawBufferStore(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B,
bool IsFormat) const {
// TODO: Reject f16 format on targets where unsupported.
Register VData = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(VData);
B.setInstr(MI);
const LLT S32 = LLT::scalar(32);
const LLT S16 = LLT::scalar(16);
// Fixup illegal register types for i8 stores.
if (Ty == LLT::scalar(8) || Ty == S16) {
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
MI.getOperand(1).setReg(AnyExt);
return true;
}
if (Ty.isVector()) {
if (Ty.getElementType() == S16 && Ty.getNumElements() <= 4) {
if (IsFormat)
MI.getOperand(1).setReg(handleD16VData(B, MRI, VData));
return true;
}
return Ty.getElementType() == S32 && Ty.getNumElements() <= 4;
}
return Ty == S32;
}
bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@ -1843,6 +1899,10 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MI.eraseFromParent();
return true;
}
case Intrinsic::amdgcn_raw_buffer_store:
return legalizeRawBufferStore(MI, MRI, B, false);
case Intrinsic::amdgcn_raw_buffer_store_format:
return legalizeRawBufferStore(MI, MRI, B, true);
default:
return true;
}

View File

@ -83,6 +83,11 @@ public:
MachineIRBuilder &B) const;
bool legalizeIsAddrSpace(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, unsigned AddrSpace) const;
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg) const;
bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, bool IsFormat) const;
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const override;

View File

@ -20,6 +20,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@ -33,6 +34,7 @@
#include "AMDGPUGenRegisterBankInfo.def"
using namespace llvm;
using namespace MIPatternMatch;
namespace {
@ -84,9 +86,11 @@ public:
};
}
AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI)
AMDGPURegisterBankInfo::AMDGPURegisterBankInfo(const GCNSubtarget &ST)
: AMDGPUGenRegisterBankInfo(),
TRI(static_cast<const SIRegisterInfo*>(&TRI)) {
Subtarget(ST),
TRI(Subtarget.getRegisterInfo()),
TII(Subtarget.getInstrInfo()) {
// HACK: Until this is fully tablegen'd.
static bool AlreadyInit = false;
@ -638,8 +642,10 @@ static LLT getHalfSizedType(LLT Ty) {
///
/// There is additional complexity to try for compare values to identify the
/// unique values used.
void AMDGPURegisterBankInfo::executeInWaterfallLoop(
MachineInstr &MI, MachineRegisterInfo &MRI,
bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
MachineIRBuilder &B,
MachineInstr &MI,
MachineRegisterInfo &MRI,
ArrayRef<unsigned> OpIndices) const {
MachineFunction *MF = MI.getParent()->getParent();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
@ -662,9 +668,8 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop(
// No operands need to be replaced, so no need to loop.
if (SGPROperandRegs.empty())
return;
return false;
MachineIRBuilder B(MI);
SmallVector<Register, 4> ResultRegs;
SmallVector<Register, 4> InitResultRegs;
SmallVector<Register, 4> PhiRegs;
@ -922,6 +927,18 @@ void AMDGPURegisterBankInfo::executeInWaterfallLoop(
B.buildInstr(AMDGPU::S_MOV_B64_term)
.addDef(AMDGPU::EXEC)
.addReg(SaveExecReg);
// Restore the insert point before the original instruction.
B.setInsertPt(MBB, MBB.end());
return true;
}
bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
MachineInstr &MI, MachineRegisterInfo &MRI,
ArrayRef<unsigned> OpIndices) const {
MachineIRBuilder B(MI);
return executeInWaterfallLoop(B, MI, MRI, OpIndices);
}
// Legalize an operand that must be an SGPR by inserting a readfirstlane.
@ -1031,6 +1048,33 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
return true;
}
bool AMDGPURegisterBankInfo::applyMappingImage(
MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
MachineRegisterInfo &MRI, int RsrcIdx) const {
const int NumDefs = MI.getNumExplicitDefs();
// The reported argument index is relative to the IR intrinsic call arguments,
// so we need to shift by the number of defs and the intrinsic ID.
RsrcIdx += NumDefs + 1;
// Insert copies to VGPR arguments.
applyDefaultMapping(OpdMapper);
// Fixup any SGPR arguments.
SmallVector<unsigned, 4> SGPRIndexes;
for (int I = NumDefs, NumOps = MI.getNumOperands(); I != NumOps; ++I) {
if (!MI.getOperand(I).isReg())
continue;
// If this intrinsic has a sampler, it immediately follows rsrc.
if (I == RsrcIdx || I == RsrcIdx + 1)
SGPRIndexes.push_back(I);
}
executeInWaterfallLoop(MI, MRI, SGPRIndexes);
return true;
}
// For cases where only a single copy is inserted for matching register banks.
// Replace the register in the instruction operand
static void substituteSimpleCopyRegs(
@ -1042,6 +1086,184 @@ static void substituteSimpleCopyRegs(
}
}
/// Handle register layout difference for f16 images for some subtargets.
Register AMDGPURegisterBankInfo::handleD16VData(MachineIRBuilder &B,
MachineRegisterInfo &MRI,
Register Reg) const {
if (!Subtarget.hasUnpackedD16VMem())
return Reg;
const LLT S16 = LLT::scalar(16);
LLT StoreVT = MRI.getType(Reg);
if (!StoreVT.isVector() || StoreVT.getElementType() != S16)
return Reg;
auto Unmerge = B.buildUnmerge(S16, Reg);
SmallVector<Register, 4> WideRegs;
for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
WideRegs.push_back(Unmerge.getReg(I));
const LLT S32 = LLT::scalar(32);
int NumElts = StoreVT.getNumElements();
return B.buildMerge(LLT::vector(NumElts, S32), WideRegs).getReg(0);
}
static std::pair<Register, unsigned>
getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
int64_t Const;
if (mi_match(Reg, MRI, m_ICst(Const)))
return std::make_pair(Register(), Const);
Register Base;
if (mi_match(Reg, MRI, m_GAdd(m_Reg(Base), m_ICst(Const))))
return std::make_pair(Base, Const);
// TODO: Handle G_OR used for add case
return std::make_pair(Reg, 0);
}
std::pair<Register, unsigned>
AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
const unsigned MaxImm = 4095;
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
std::tie(BaseReg, ImmOffset) = getBaseWithConstantOffset(*B.getMRI(),
OrigOffset);
unsigned C1 = 0;
if (ImmOffset != 0) {
// If the immediate value is too big for the immoffset field, put the value
// and -4096 into the immoffset field so that the value that is copied/added
// for the voffset field is a multiple of 4096, and it stands more chance
// of being CSEd with the copy/add for another similar load/store.
// However, do not do that rounding down to a multiple of 4096 if that is a
// negative number, as it appears to be illegal to have a negative offset
// in the vgpr, even if adding the immediate offset makes it positive.
unsigned Overflow = ImmOffset & ~MaxImm;
ImmOffset -= Overflow;
if ((int32_t)Overflow < 0) {
Overflow += ImmOffset;
ImmOffset = 0;
}
C1 = ImmOffset;
if (Overflow != 0) {
if (!BaseReg)
BaseReg = B.buildConstant(S32, Overflow).getReg(0);
else {
auto OverflowVal = B.buildConstant(S32, Overflow);
BaseReg = B.buildAdd(S32, BaseReg, OverflowVal).getReg(0);
}
}
}
if (!BaseReg)
BaseReg = B.buildConstant(S32, 0).getReg(0);
return {BaseReg, C1};
}
static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
int64_t C;
return mi_match(Reg, MRI, m_ICst(C)) && C == 0;
}
static unsigned extractGLC(unsigned CachePolicy) {
return CachePolicy & 1;
}
static unsigned extractSLC(unsigned CachePolicy) {
return (CachePolicy >> 1) & 1;
}
static unsigned extractDLC(unsigned CachePolicy) {
return (CachePolicy >> 2) & 1;
}
MachineInstr *
AMDGPURegisterBankInfo::selectStoreIntrinsic(MachineIRBuilder &B,
MachineInstr &MI) const {
MachineRegisterInfo &MRI = *B.getMRI();
executeInWaterfallLoop(B, MI, MRI, {2, 4});
// FIXME: DAG lowering brokenly changes opcode based on FP vs. integer.
Register VData = MI.getOperand(1).getReg();
LLT Ty = MRI.getType(VData);
int EltSize = Ty.getScalarSizeInBits();
int Size = Ty.getSizeInBits();
// FIXME: Broken integer truncstore.
if (EltSize != 32)
report_fatal_error("unhandled intrinsic store");
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
const int MemSize = (*MI.memoperands_begin())->getSize();
Register RSrc = MI.getOperand(2).getReg();
Register VOffset = MI.getOperand(3).getReg();
Register SOffset = MI.getOperand(4).getReg();
unsigned CachePolicy = MI.getOperand(5).getImm();
unsigned ImmOffset;
std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset);
const bool Offen = !isZero(VOffset, MRI);
unsigned Opc = AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact;
switch (8 * MemSize) {
case 8:
Opc = Offen ? AMDGPU::BUFFER_STORE_BYTE_OFFEN_exact :
AMDGPU::BUFFER_STORE_BYTE_OFFSET_exact;
break;
case 16:
Opc = Offen ? AMDGPU::BUFFER_STORE_SHORT_OFFEN_exact :
AMDGPU::BUFFER_STORE_SHORT_OFFSET_exact;
break;
default:
Opc = Offen ? AMDGPU::BUFFER_STORE_DWORD_OFFEN_exact :
AMDGPU::BUFFER_STORE_DWORD_OFFSET_exact;
if (Size > 32)
Opc = AMDGPU::getMUBUFOpcode(Opc, Size / 32);
break;
}
// Set the insertion point back to the instruction in case it was moved into a
// loop.
B.setInstr(MI);
MachineInstrBuilder MIB = B.buildInstr(Opc)
.addUse(VData);
if (Offen)
MIB.addUse(VOffset);
MIB.addUse(RSrc)
.addUse(SOffset)
.addImm(ImmOffset)
.addImm(extractGLC(CachePolicy))
.addImm(extractSLC(CachePolicy))
.addImm(0) // tfe: FIXME: Remove from inst
.addImm(extractDLC(CachePolicy))
.cloneMemRefs(MI);
// FIXME: We need a way to report failure from applyMappingImpl.
// Insert constrain copies before inserting the loop.
if (!constrainSelectedInstRegOperands(*MIB, *TII, *TRI, *this))
report_fatal_error("failed to constrain selected store intrinsic");
return MIB;
}
void AMDGPURegisterBankInfo::applyMappingImpl(
const OperandsMapper &OpdMapper) const {
MachineInstr &MI = OpdMapper.getMI();
@ -1405,7 +1627,8 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
break;
}
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
auto IntrID = MI.getIntrinsicID();
switch (IntrID) {
case Intrinsic::amdgcn_buffer_load: {
executeInWaterfallLoop(MI, MRI, { 2 });
return;
@ -1424,9 +1647,39 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(MI, MRI, 2); // M0
return;
}
default:
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format:
case Intrinsic::amdgcn_raw_tbuffer_load:
case Intrinsic::amdgcn_raw_buffer_store:
case Intrinsic::amdgcn_raw_buffer_store_format:
case Intrinsic::amdgcn_raw_tbuffer_store: {
applyDefaultMapping(OpdMapper);
executeInWaterfallLoop(MI, MRI, {2, 4});
return;
}
case Intrinsic::amdgcn_struct_buffer_load:
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_tbuffer_load:
case Intrinsic::amdgcn_struct_tbuffer_store: {
applyDefaultMapping(OpdMapper);
executeInWaterfallLoop(MI, MRI, {2, 5});
return;
}
default: {
if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
// Non-images can have complications from operands that allow both SGPR
// and VGPR. For now it's too complicated to figure out the final opcode
// to derive the register bank from the MCInstrDesc.
if (RSrcIntrin->IsImage) {
applyMappingImage(MI, OpdMapper, MRI, RSrcIntrin->RsrcArg);
return;
}
}
break;
}
}
break;
}
case AMDGPU::G_LOAD:
@ -1531,6 +1784,45 @@ AMDGPURegisterBankInfo::getDefaultMappingAllVGPR(const MachineInstr &MI) const {
MI.getNumOperands());
}
const RegisterBankInfo::InstructionMapping &
AMDGPURegisterBankInfo::getImageMapping(const MachineRegisterInfo &MRI,
const MachineInstr &MI,
int RsrcIdx) const {
// The reported argument index is relative to the IR intrinsic call arguments,
// so we need to shift by the number of defs and the intrinsic ID.
RsrcIdx += MI.getNumExplicitDefs() + 1;
const int NumOps = MI.getNumOperands();
SmallVector<const ValueMapping *, 8> OpdsMapping(NumOps);
// TODO: Should packed/unpacked D16 difference be reported here as part of
// the value mapping?
for (int I = 0; I != NumOps; ++I) {
if (!MI.getOperand(I).isReg())
continue;
Register OpReg = MI.getOperand(I).getReg();
unsigned Size = getSizeInBits(OpReg, MRI, *TRI);
// FIXME: Probably need a new intrinsic register bank searchable table to
// handle arbitrary intrinsics easily.
//
// If this has a sampler, it immediately follows rsrc.
const bool MustBeSGPR = I == RsrcIdx || I == RsrcIdx + 1;
if (MustBeSGPR) {
// If this must be an SGPR, so we must report whatever it is as legal.
unsigned NewBank = getRegBankID(OpReg, MRI, *TRI, AMDGPU::SGPRRegBankID);
OpdsMapping[I] = AMDGPU::getValueMapping(NewBank, Size);
} else {
// Some operands must be VGPR, and these are easy to copy to.
OpdsMapping[I] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
}
}
return getInstructionMapping(1, 1, getOperandsMapping(OpdsMapping), NumOps);
}
const RegisterBankInfo::InstructionMapping &
AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
@ -1577,11 +1869,31 @@ AMDGPURegisterBankInfo::getRegBankID(Register Reg,
return Bank ? Bank->getID() : Default;
}
static unsigned regBankUnion(unsigned RB0, unsigned RB1) {
return (RB0 == AMDGPU::SGPRRegBankID && RB1 == AMDGPU::SGPRRegBankID) ?
AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
}
const RegisterBankInfo::ValueMapping *
AMDGPURegisterBankInfo::getSGPROpMapping(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
// Lie and claim anything is legal, even though this needs to be an SGPR
// applyMapping will have to deal with it as a waterfall loop.
unsigned Bank = getRegBankID(Reg, MRI, TRI, AMDGPU::SGPRRegBankID);
unsigned Size = getSizeInBits(Reg, MRI, TRI);
return AMDGPU::getValueMapping(Bank, Size);
}
const RegisterBankInfo::ValueMapping *
AMDGPURegisterBankInfo::getVGPROpMapping(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
unsigned Size = getSizeInBits(Reg, MRI, TRI);
return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
}
///
/// This function must return a legal mapping, because
/// AMDGPURegisterBankInfo::getInstrAlternativeMappings() is not called
@ -1748,7 +2060,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
LLVM_FALLTHROUGH;
}
case AMDGPU::G_GEP:
case AMDGPU::G_ADD:
case AMDGPU::G_SUB:
@ -1764,8 +2075,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_SADDE:
case AMDGPU::G_USUBE:
case AMDGPU::G_SSUBE:
case AMDGPU::G_UMULH:
case AMDGPU::G_SMULH:
case AMDGPU::G_SMIN:
case AMDGPU::G_SMAX:
case AMDGPU::G_UMIN:
@ -1799,6 +2108,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_INTRINSIC_TRUNC:
case AMDGPU::G_INTRINSIC_ROUND:
return getDefaultMappingVOP(MI);
case AMDGPU::G_UMULH:
case AMDGPU::G_SMULH: {
if (MF.getSubtarget<GCNSubtarget>().hasScalarMulHiInsts() &&
isSALUMapping(MI))
return getDefaultMappingSOP(MI);
return getDefaultMappingVOP(MI);
}
case AMDGPU::G_IMPLICIT_DEF: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
@ -2072,6 +2388,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_wwm:
case Intrinsic::amdgcn_wqm:
return getDefaultMappingVOP(MI);
case Intrinsic::amdgcn_ds_swizzle:
case Intrinsic::amdgcn_ds_permute:
case Intrinsic::amdgcn_ds_bpermute:
case Intrinsic::amdgcn_update_dpp:
@ -2193,9 +2510,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: {
switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
default:
return getInvalidInstructionMapping();
auto IntrID = MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID();
switch (IntrID) {
case Intrinsic::amdgcn_s_getreg:
case Intrinsic::amdgcn_s_memtime:
case Intrinsic::amdgcn_s_memrealtime:
@ -2235,18 +2551,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
break;
case Intrinsic::amdgcn_exp:
OpdsMapping[0] = nullptr; // IntrinsicID
// FIXME: These are immediate values which can't be read from registers.
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
// FIXME: Could we support packed types here?
OpdsMapping[3] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[4] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[5] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
// FIXME: These are immediate values which can't be read from registers.
OpdsMapping[7] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
OpdsMapping[8] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
break;
case Intrinsic::amdgcn_buffer_load: {
Register RSrc = MI.getOperand(2).getReg(); // SGPR
@ -2298,6 +2607,54 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
break;
}
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_tbuffer_load: {
// FIXME: Should make intrinsic ID the last operand of the instruction,
// then this would be the same as store
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
break;
}
case Intrinsic::amdgcn_raw_buffer_store:
case Intrinsic::amdgcn_raw_buffer_store_format:
case Intrinsic::amdgcn_raw_tbuffer_store: {
OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
break;
}
case Intrinsic::amdgcn_struct_buffer_load:
case Intrinsic::amdgcn_struct_tbuffer_load: {
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
break;
}
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_tbuffer_store: {
OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
break;
}
default:
if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
// Non-images can have complications from operands that allow both SGPR
// and VGPR. For now it's too complicated to figure out the final opcode
// to derive the register bank from the MCInstrDesc.
if (RSrcIntrin->IsImage)
return getImageMapping(MRI, MI, RSrcIntrin->RsrcArg);
}
return getInvalidInstructionMapping();
}
break;
}

View File

@ -23,7 +23,9 @@
namespace llvm {
class LLT;
class GCNSubtarget;
class MachineIRBuilder;
class SIInstrInfo;
class SIRegisterInfo;
class TargetRegisterInfo;
@ -36,9 +38,15 @@ protected:
#include "AMDGPUGenRegisterBank.inc"
};
class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const GCNSubtarget &Subtarget;
const SIRegisterInfo *TRI;
const SIInstrInfo *TII;
void executeInWaterfallLoop(MachineInstr &MI,
bool executeInWaterfallLoop(MachineIRBuilder &B,
MachineInstr &MI,
MachineRegisterInfo &MRI,
ArrayRef<unsigned> OpIndices) const;
bool executeInWaterfallLoop(MachineInstr &MI,
MachineRegisterInfo &MRI,
ArrayRef<unsigned> OpIndices) const;
@ -47,6 +55,19 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
bool applyMappingWideLoad(MachineInstr &MI,
const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
MachineRegisterInfo &MRI) const;
bool
applyMappingImage(MachineInstr &MI,
const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
MachineRegisterInfo &MRI, int RSrcIdx) const;
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg) const;
std::pair<Register, unsigned>
splitBufferOffsets(MachineIRBuilder &B, Register Offset) const;
MachineInstr *selectStoreIntrinsic(MachineIRBuilder &B,
MachineInstr &MI) const;
/// See RegisterBankInfo::applyMapping.
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
@ -58,6 +79,16 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const TargetRegisterInfo &TRI,
unsigned Default = AMDGPU::VGPRRegBankID) const;
// Return a value mapping for an operand that is required to be an SGPR.
const ValueMapping *getSGPROpMapping(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const;
// Return a value mapping for an operand that is required to be a VGPR.
const ValueMapping *getVGPROpMapping(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const;
/// Split 64-bit value \p Reg into two 32-bit halves and populate them into \p
/// Regs. This appropriately sets the regbank of the new registers.
void split64BitValueForMapping(MachineIRBuilder &B,
@ -90,8 +121,13 @@ class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
const InstructionMapping &getDefaultMappingVOP(const MachineInstr &MI) const;
const InstructionMapping &getDefaultMappingAllVGPR(
const MachineInstr &MI) const;
const InstructionMapping &getImageMapping(const MachineRegisterInfo &MRI,
const MachineInstr &MI,
int RsrcIdx) const;
public:
AMDGPURegisterBankInfo(const TargetRegisterInfo &TRI);
AMDGPURegisterBankInfo(const GCNSubtarget &STI);
unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
unsigned Size) const override;

View File

@ -283,7 +283,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(this);
CallLoweringInfo.reset(new AMDGPUCallLowering(*getTargetLowering()));
Legalizer.reset(new AMDGPULegalizerInfo(*this, TM));
RegBankInfo.reset(new AMDGPURegisterBankInfo(*getRegisterInfo()));
RegBankInfo.reset(new AMDGPURegisterBankInfo(*this));
InstSelector.reset(new AMDGPUInstructionSelector(
*this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
}

View File

@ -555,6 +555,10 @@ public:
return GFX9Insts;
}
bool hasScalarMulHiInsts() const {
return GFX9Insts;
}
TrapHandlerAbi getTrapHandlerAbi() const {
return isAmdHsaOS() ? TrapHandlerAbiHsa : TrapHandlerAbiNone;
}

View File

@ -1135,29 +1135,29 @@ def extract_dlc : SDNodeXForm<imm, [{
multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0)),
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0)),
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
imm:$cachepolicy, imm)),
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
imm:$cachepolicy, imm)),
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
@ -1210,31 +1210,31 @@ defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_ushort, i32, "BUFFER_LOAD_USHORT">;
multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0),
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0),
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (extract_glc $cachepolicy),
(extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
imm:$cachepolicy, imm),
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (extract_glc $cachepolicy),
(extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
imm:$cachepolicy, imm),
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@ -1291,32 +1291,32 @@ multiclass BufferAtomicPatterns<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, 0,
0, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0)),
0, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_RTN) $vdata_in, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
0, i32:$soffset, imm:$offset,
imm:$cachepolicy, imm)),
0, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_RTN) $vdata_in, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, 0,
i32:$voffset, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0)),
i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_RTN) $vdata_in, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(vt (name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
i32:$voffset, i32:$soffset, imm:$offset,
imm:$cachepolicy, imm)),
i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_RTN)
$vdata_in,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@ -1353,32 +1353,32 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, 0,
0, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0),
0, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $vdata_in, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
0, i32:$soffset, imm:$offset,
imm:$cachepolicy, imm),
0, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vdata_in, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, 0,
i32:$voffset, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0),
i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $vdata_in, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (extract_slc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata_in, v4i32:$rsrc, i32:$vindex,
i32:$voffset, i32:$soffset, imm:$offset,
imm:$cachepolicy, imm),
i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
$vdata_in,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
@ -1392,8 +1392,8 @@ defm : BufferAtomicPatterns_NO_RTN<SIbuffer_atomic_pk_fadd, v2f16, "BUFFER_ATOMI
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
0, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0),
0, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@ -1404,8 +1404,8 @@ def : GCNPat<
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
0, i32:$soffset, imm:$offset,
imm:$cachepolicy, imm),
0, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_IDXEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@ -1416,8 +1416,8 @@ def : GCNPat<
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, 0,
i32:$voffset, i32:$soffset, imm:$offset,
imm:$cachepolicy, 0),
i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@ -1428,8 +1428,8 @@ def : GCNPat<
def : GCNPat<
(SIbuffer_atomic_cmpswap
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
i32:$voffset, i32:$soffset, imm:$offset,
imm:$cachepolicy, imm),
i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm),
(EXTRACT_SUBREG
(BUFFER_ATOMIC_CMPSWAP_BOTHEN_RTN
(REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
@ -1642,32 +1642,32 @@ defm : MUBUFScratchStorePat <BUFFER_STORE_BYTE_D16_HI_OFFEN, BUFFER_STORE_BYTE_D
multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
imm:$format, imm:$cachepolicy, 0)),
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
imm:$format, imm:$cachepolicy, imm)),
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$cachepolicy, timm)),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
imm:$format, imm:$cachepolicy, 0)),
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$format, timm:$cachepolicy, 0)),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
(as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, imm:$offset,
imm:$format, imm:$cachepolicy, imm)),
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
timm:$format, timm:$cachepolicy, timm)),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
$rsrc, $soffset, (as_i16imm $offset),
@ -1700,24 +1700,24 @@ let SubtargetPredicate = HasPackedD16VMem in {
multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, imm:$offset,
imm:$format, imm:$cachepolicy, 0),
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, imm:$offset,
imm:$format, imm:$cachepolicy, imm),
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$cachepolicy, timm),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
>;
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, imm:$offset,
imm:$format, imm:$cachepolicy, 0),
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$format, timm:$cachepolicy, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
(as_i16imm $offset), (as_i8imm $format),
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
@ -1725,7 +1725,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
imm:$offset, imm:$format, imm:$cachepolicy, imm),
timm:$offset, timm:$format, timm:$cachepolicy, timm),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
$vdata,
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),

View File

@ -603,7 +603,7 @@ def DS_ADD_SRC2_F32 : DS_1A<"ds_add_src2_f32">;
//===----------------------------------------------------------------------===//
def : GCNPat <
(int_amdgcn_ds_swizzle i32:$src, imm:$offset16),
(int_amdgcn_ds_swizzle i32:$src, timm:$offset16),
(DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
>;

View File

@ -5666,14 +5666,14 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
SDVTList VTList = DAG.getVTList({LoadVT, MVT::Glue});
unsigned CachePolicy = cast<ConstantSDNode>(GLC)->getZExtValue();
SDValue Ops[] = {
DAG.getEntryNode(), // Chain
Rsrc, // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex
{}, // voffset
{}, // soffset
{}, // offset
DAG.getConstant(CachePolicy, DL, MVT::i32), // cachepolicy
DAG.getConstant(0, DL, MVT::i1), // idxen
DAG.getEntryNode(), // Chain
Rsrc, // rsrc
DAG.getConstant(0, DL, MVT::i32), // vindex
{}, // voffset
{}, // soffset
{}, // offset
DAG.getTargetConstant(CachePolicy, DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
// Use the alignment to ensure that the required offsets will fit into the
@ -5682,7 +5682,7 @@ SDValue SITargetLowering::lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc,
uint64_t InstOffset = cast<ConstantSDNode>(Ops[5])->getZExtValue();
for (unsigned i = 0; i < NumLoads; ++i) {
Ops[5] = DAG.getConstant(InstOffset + 16 * i, DL, MVT::i32);
Ops[5] = DAG.getTargetConstant(InstOffset + 16 * i, DL, MVT::i32);
Loads.push_back(DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_LOAD, DL, VTList,
Ops, LoadVT, MMO));
}
@ -5894,12 +5894,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1), // Src0
Op.getOperand(2), // Attrchan
Op.getOperand(3), // Attr
DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
S, // Src2 - holds two f16 values selected by high
DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
Op.getOperand(4), // high
DAG.getConstant(0, DL, MVT::i1), // $clamp
DAG.getConstant(0, DL, MVT::i32) // $omod
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
DAG.getTargetConstant(0, DL, MVT::i32) // $omod
};
return DAG.getNode(AMDGPUISD::INTERP_P1LV_F16, DL, MVT::f32, Ops);
} else {
@ -5908,10 +5908,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1), // Src0
Op.getOperand(2), // Attrchan
Op.getOperand(3), // Attr
DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
Op.getOperand(4), // high
DAG.getConstant(0, DL, MVT::i1), // $clamp
DAG.getConstant(0, DL, MVT::i32), // $omod
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
DAG.getTargetConstant(0, DL, MVT::i32), // $omod
Glue
};
return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
@ -5924,11 +5924,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(2), // Src0
Op.getOperand(3), // Attrchan
Op.getOperand(4), // Attr
DAG.getConstant(0, DL, MVT::i32), // $src0_modifiers
DAG.getTargetConstant(0, DL, MVT::i32), // $src0_modifiers
Op.getOperand(1), // Src2
DAG.getConstant(0, DL, MVT::i32), // $src2_modifiers
DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
Op.getOperand(5), // high
DAG.getConstant(0, DL, MVT::i1), // $clamp
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
Glue
};
return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops);
@ -6234,8 +6234,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
setBufferOffsets(Op.getOperand(4), DAG, &Ops[3]);
@ -6272,7 +6272,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(4), // soffset
Offsets.second, // offset
Op.getOperand(5), // cachepolicy
DAG.getConstant(0, DL, MVT::i1), // idxen
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
@ -6290,7 +6290,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // soffset
Offsets.second, // offset
Op.getOperand(6), // cachepolicy
DAG.getConstant(1, DL, MVT::i1), // idxen
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
@ -6313,9 +6313,9 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(4), // voffset
Op.getOperand(5), // soffset
Op.getOperand(6), // offset
DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1) // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
@ -6339,7 +6339,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Offsets.second, // offset
Op.getOperand(5), // format
Op.getOperand(6), // cachepolicy
DAG.getConstant(0, DL, MVT::i1), // idxen
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
@ -6363,7 +6363,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Offsets.second, // offset
Op.getOperand(6), // format
Op.getOperand(7), // cachepolicy
DAG.getConstant(1, DL, MVT::i1), // idxen
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
@ -6395,8 +6395,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
EVT VT = Op.getValueType();
@ -6464,7 +6464,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(5), // soffset
Offsets.second, // offset
Op.getOperand(6), // cachepolicy
DAG.getConstant(0, DL, MVT::i1), // idxen
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
@ -6537,7 +6537,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(6), // soffset
Offsets.second, // offset
Op.getOperand(7), // cachepolicy
DAG.getConstant(1, DL, MVT::i1), // idxen
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
@ -6602,8 +6602,8 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
setBufferOffsets(Op.getOperand(6), DAG, &Ops[5]);
EVT VT = Op.getValueType();
@ -6624,7 +6624,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(6), // soffset
Offsets.second, // offset
Op.getOperand(7), // cachepolicy
DAG.getConstant(0, DL, MVT::i1), // idxen
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
@ -6644,7 +6644,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op.getOperand(7), // soffset
Offsets.second, // offset
Op.getOperand(8), // cachepolicy
DAG.getConstant(1, DL, MVT::i1), // idxen
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
@ -6806,9 +6806,9 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(5), // voffset
Op.getOperand(6), // soffset
Op.getOperand(7), // offset
DAG.getConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getConstant(IdxEn, DL, MVT::i1), // idexen
DAG.getTargetConstant(Dfmt | (Nfmt << 4), DL, MVT::i32), // format
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idexen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
@ -6833,7 +6833,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Offsets.second, // offset
Op.getOperand(7), // format
Op.getOperand(8), // cachepolicy
DAG.getConstant(1, DL, MVT::i1), // idexen
DAG.getTargetConstant(1, DL, MVT::i1), // idexen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
@ -6858,7 +6858,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Offsets.second, // offset
Op.getOperand(6), // format
Op.getOperand(7), // cachepolicy
DAG.getConstant(0, DL, MVT::i1), // idexen
DAG.getTargetConstant(0, DL, MVT::i1), // idexen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
@ -6886,8 +6886,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
DAG.getConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
DAG.getTargetConstant(Glc | (Slc << 1), DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_buffer_store ?
@ -6932,7 +6932,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(5), // soffset
Offsets.second, // offset
Op.getOperand(6), // cachepolicy
DAG.getConstant(0, DL, MVT::i1), // idxen
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
unsigned Opc =
IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE;
@ -6976,7 +6976,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Op.getOperand(6), // soffset
Offsets.second, // offset
Op.getOperand(7), // cachepolicy
DAG.getConstant(1, DL, MVT::i1), // idxen
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ?
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
@ -7005,8 +7005,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue(), // voffset -- will be set by setBufferOffsets
SDValue(), // soffset -- will be set by setBufferOffsets
SDValue(), // offset -- will be set by setBufferOffsets
DAG.getConstant(Slc << 1, DL, MVT::i32), // cachepolicy
DAG.getConstant(IdxEn, DL, MVT::i1), // idxen
DAG.getTargetConstant(Slc << 1, DL, MVT::i32), // cachepolicy
DAG.getTargetConstant(IdxEn, DL, MVT::i1), // idxen
};
setBufferOffsets(Op.getOperand(5), DAG, &Ops[4]);
EVT VT = Op.getOperand(2).getValueType();
@ -7084,7 +7084,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
Overflow += ImmOffset;
ImmOffset = 0;
}
C1 = cast<ConstantSDNode>(DAG.getConstant(ImmOffset, DL, MVT::i32));
C1 = cast<ConstantSDNode>(DAG.getTargetConstant(ImmOffset, DL, MVT::i32));
if (Overflow) {
auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32);
if (!N0)
@ -7098,7 +7098,7 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
if (!N0)
N0 = DAG.getConstant(0, DL, MVT::i32);
if (!C1)
C1 = cast<ConstantSDNode>(DAG.getConstant(0, DL, MVT::i32));
C1 = cast<ConstantSDNode>(DAG.getTargetConstant(0, DL, MVT::i32));
return {N0, SDValue(C1, 0)};
}
@ -7115,7 +7115,7 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget, Align)) {
Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
return;
}
}
@ -7128,13 +7128,13 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
Subtarget, Align)) {
Offsets[0] = N0;
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
Offsets[2] = DAG.getConstant(ImmOffset, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
return;
}
}
Offsets[0] = CombinedOffset;
Offsets[1] = DAG.getConstant(0, DL, MVT::i32);
Offsets[2] = DAG.getConstant(0, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32);
}
// Handle 8 bit and 16 bit buffer loads

View File

@ -43,8 +43,8 @@ multiclass V_INTERP_P1_F32_m : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_p1_f32$vdst, $vsrc, $attr$attrchan",
[(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 imm:$attrchan),
(i32 imm:$attr)))]
[(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 timm:$attrchan),
(i32 timm:$attr)))]
>;
let OtherPredicates = [has32BankLDS] in {
@ -66,8 +66,8 @@ defm V_INTERP_P2_F32 : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_p2_f32$vdst, $vsrc, $attr$attrchan",
[(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 imm:$attrchan),
(i32 imm:$attr)))]>;
[(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 timm:$attrchan),
(i32 timm:$attr)))]>;
} // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst"
@ -76,8 +76,8 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
(outs VINTRPDst:$vdst),
(ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
"v_interp_mov_f32$vdst, $vsrc, $attr$attrchan",
[(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 imm:$attrchan),
(i32 imm:$attr)))]>;
[(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 timm:$attrchan),
(i32 timm:$attr)))]>;
} // End Uses = [M0, EXEC]

View File

@ -1090,7 +1090,7 @@ def S_WAKEUP : SOPP <0x00000003, (ins), "s_wakeup"> {
let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16",
[(int_amdgcn_s_waitcnt UIMM16bit:$simm16)]>;
[(int_amdgcn_s_waitcnt timm:$simm16)]>;
def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
@ -1099,7 +1099,7 @@ def S_SETKILL : SOPP <0x0000000b, (ins i16imm:$simm16), "s_setkill $simm16">;
// maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the
// maximum really 15 on VI?
def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16),
"s_sleep $simm16", [(int_amdgcn_s_sleep SIMM16bit:$simm16)]> {
"s_sleep $simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
let hasSideEffects = 1;
let mayLoad = 1;
let mayStore = 1;
@ -1110,10 +1110,10 @@ def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">;
let Uses = [EXEC, M0] in {
// FIXME: Should this be mayLoad+mayStore?
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
[(int_amdgcn_s_sendmsg (i32 imm:$simm16), M0)]>;
[(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]>;
def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
[(int_amdgcn_s_sendmsghalt (i32 imm:$simm16), M0)]>;
[(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]>;
} // End Uses = [EXEC, M0]
@ -1125,13 +1125,13 @@ def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
let simm16 = 0;
}
def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16",
[(int_amdgcn_s_incperflevel SIMM16bit:$simm16)]> {
[(int_amdgcn_s_incperflevel timm:$simm16)]> {
let hasSideEffects = 1;
let mayLoad = 1;
let mayStore = 1;
}
def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16",
[(int_amdgcn_s_decperflevel SIMM16bit:$simm16)]> {
[(int_amdgcn_s_decperflevel timm:$simm16)]> {
let hasSideEffects = 1;
let mayLoad = 1;
let mayStore = 1;
@ -1180,7 +1180,7 @@ let SubtargetPredicate = isGFX10Plus in {
// S_GETREG_B32 Intrinsic Pattern.
//===----------------------------------------------------------------------===//
def : GCNPat <
(int_amdgcn_s_getreg imm:$simm16),
(int_amdgcn_s_getreg timm:$simm16),
(S_GETREG_B32 (as_i16imm $simm16))
>;

View File

@ -841,16 +841,16 @@ def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
let OtherPredicates = [isGFX8GFX9] in {
def : GCNPat <
(i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
imm:$bound_ctrl)),
(i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask,
timm:$bound_ctrl)),
(V_MOV_B32_dpp $src, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl))
>;
def : GCNPat <
(i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
imm:$bank_mask, imm:$bound_ctrl)),
(i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask,
timm:$bank_mask, timm:$bound_ctrl)),
(V_MOV_B32_dpp $old, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl))
@ -911,21 +911,21 @@ defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
let OtherPredicates = [isGFX10Plus] in {
def : GCNPat <
(i32 (int_amdgcn_mov_dpp8 i32:$src, imm:$dpp8)),
(i32 (int_amdgcn_mov_dpp8 i32:$src, timm:$dpp8)),
(V_MOV_B32_dpp8_gfx10 $src, $src, (as_i32imm $dpp8), (i32 DPP8Mode.FI_0))
>;
def : GCNPat <
(i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
imm:$bound_ctrl)),
(i32 (int_amdgcn_mov_dpp i32:$src, timm:$dpp_ctrl, timm:$row_mask, timm:$bank_mask,
timm:$bound_ctrl)),
(V_MOV_B32_dpp_gfx10 $src, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl), (i32 0))
>;
def : GCNPat <
(i32 (int_amdgcn_update_dpp i32:$old, i32:$src, imm:$dpp_ctrl, imm:$row_mask,
imm:$bank_mask, imm:$bound_ctrl)),
(i32 (int_amdgcn_update_dpp i32:$old, i32:$src, timm:$dpp_ctrl, timm:$row_mask,
timm:$bank_mask, timm:$bound_ctrl)),
(V_MOV_B32_dpp_gfx10 $old, $src, (as_i32imm $dpp_ctrl),
(as_i32imm $row_mask), (as_i32imm $bank_mask),
(as_i1imm $bound_ctrl), (i32 0))

View File

@ -112,7 +112,7 @@ class getVOP3ClampPat<VOPProfile P, SDPatternOperator node> {
class getVOP3MAIPat<VOPProfile P, SDPatternOperator node> {
list<dag> ret = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2,
imm:$cbsz, imm:$abid, imm:$blgp))];
timm:$cbsz, timm:$abid, timm:$blgp))];
}
class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
@ -453,13 +453,13 @@ let FPDPRounding = 1 in {
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
let Uses = [M0, EXEC] in {
def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>,
[(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 imm:$attrchan),
(i32 imm:$attr),
(i32 imm:$src0_modifiers),
[(set f16:$vdst, (AMDGPUinterp_p2_f16 f32:$src0, (i32 timm:$attrchan),
(i32 timm:$attr),
(i32 timm:$src0_modifiers),
(f32 VRegSrc_32:$src2),
(i32 imm:$src2_modifiers),
(i1 imm:$high),
(i1 imm:$clamp)))]>;
(i32 timm:$src2_modifiers),
(i1 timm:$high),
(i1 timm:$clamp)))]>;
} // End Uses = [M0, EXEC]
} // End FPDPRounding = 1
} // End renamedInGFX9 = 1
@ -478,21 +478,21 @@ def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f1
let Uses = [M0, EXEC], FPDPRounding = 1 in {
def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>,
[(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 imm:$attrchan),
(i32 imm:$attr),
(i32 imm:$src0_modifiers),
(i1 imm:$high),
(i1 imm:$clamp),
(i32 imm:$omod)))]>;
[(set f32:$vdst, (AMDGPUinterp_p1ll_f16 f32:$src0, (i32 timm:$attrchan),
(i32 timm:$attr),
(i32 timm:$src0_modifiers),
(i1 timm:$high),
(i1 timm:$clamp),
(i32 timm:$omod)))]>;
def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>,
[(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 imm:$attrchan),
(i32 imm:$attr),
(i32 imm:$src0_modifiers),
[(set f32:$vdst, (AMDGPUinterp_p1lv_f16 f32:$src0, (i32 timm:$attrchan),
(i32 timm:$attr),
(i32 timm:$src0_modifiers),
(f32 VRegSrc_32:$src2),
(i32 imm:$src2_modifiers),
(i1 imm:$high),
(i1 imm:$clamp),
(i32 imm:$omod)))]>;
(i32 timm:$src2_modifiers),
(i1 timm:$high),
(i1 timm:$clamp),
(i32 timm:$omod)))]>;
} // End Uses = [M0, EXEC], FPDPRounding = 1
} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1
@ -642,11 +642,11 @@ let SubtargetPredicate = isGFX10Plus in {
} // End $vdst = $vdst_in, DisableEncoding $vdst_in
def : GCNPat<
(int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
(int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
(V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
>;
def : GCNPat<
(int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc),
(int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
(V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
>;
} // End SubtargetPredicate = isGFX10Plus

View File

@ -3116,12 +3116,12 @@ ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
// Load the current TEB (thread environment block)
SDValue Ops[] = {Chain,
DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
DAG.getConstant(15, DL, MVT::i32),
DAG.getConstant(0, DL, MVT::i32),
DAG.getConstant(13, DL, MVT::i32),
DAG.getConstant(0, DL, MVT::i32),
DAG.getConstant(2, DL, MVT::i32)};
DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
DAG.getTargetConstant(15, DL, MVT::i32),
DAG.getTargetConstant(0, DL, MVT::i32),
DAG.getTargetConstant(13, DL, MVT::i32),
DAG.getTargetConstant(0, DL, MVT::i32),
DAG.getTargetConstant(2, DL, MVT::i32)};
SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
DAG.getVTList(MVT::i32, MVT::Other), Ops);
@ -8910,12 +8910,12 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
// Under Power Management extensions, the cycle-count is:
// mrc p15, #0, <Rt>, c9, c13, #0
SDValue Ops[] = { N->getOperand(0), // Chain
DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
DAG.getConstant(15, DL, MVT::i32),
DAG.getConstant(0, DL, MVT::i32),
DAG.getConstant(9, DL, MVT::i32),
DAG.getConstant(13, DL, MVT::i32),
DAG.getConstant(0, DL, MVT::i32)
DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
DAG.getTargetConstant(15, DL, MVT::i32),
DAG.getTargetConstant(0, DL, MVT::i32),
DAG.getTargetConstant(9, DL, MVT::i32),
DAG.getTargetConstant(13, DL, MVT::i32),
DAG.getTargetConstant(0, DL, MVT::i32)
};
SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,

View File

@ -5110,8 +5110,8 @@ def SWPB: AIswp<1, (outs GPRnopc:$Rt),
def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
[(int_arm_cdp timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn,
timm:$CRm, timm:$opc2)]>,
Requires<[IsARM,PreV8]> {
bits<4> opc1;
bits<4> CRn;
@ -5134,8 +5134,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
[(int_arm_cdp2 timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn,
timm:$CRm, timm:$opc2)]>,
Requires<[IsARM,PreV8]> {
let Inst{31-28} = 0b1111;
bits<4> opc1;
@ -5314,15 +5314,15 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm, list<dag> pattern> {
}
}
defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm STC : LdStCop <0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm STC : LdStCop <0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
} // DecoderNamespace = "CoProc"
@ -5358,8 +5358,8 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
(outs),
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
[(int_arm_mcr timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn,
timm:$CRm, timm:$opc2)]>,
ComplexDeprecationPredicate<"MCR">;
def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
(MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
@ -5372,8 +5372,8 @@ def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm",
(MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
def : ARMPat<(int_arm_mrc timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2),
(MRC p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>;
class MovRCopro2<string opc, bit direction, dag oops, dag iops,
list<dag> pattern>
@ -5404,8 +5404,8 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
(outs),
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
[(int_arm_mcr2 timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn,
timm:$CRm, timm:$opc2)]>,
Requires<[IsARM,PreV8]>;
def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
(MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
@ -5419,9 +5419,9 @@ def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
(MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0)>;
def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
imm:$CRm, imm:$opc2),
(MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
def : ARMV5TPat<(int_arm_mrc2 timm:$cop, timm:$opc1, timm:$CRn,
timm:$CRm, timm:$opc2),
(MRC2 p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>;
class MovRRCopro<string opc, bit direction, dag oops, dag iops, list<dag>
pattern = []>
@ -5447,8 +5447,8 @@ class MovRRCopro<string opc, bit direction, dag oops, dag iops, list<dag>
def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
(outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt,
GPRnopc:$Rt2, c_imm:$CRm),
[(int_arm_mcrr imm:$cop, imm:$opc1, GPRnopc:$Rt,
GPRnopc:$Rt2, imm:$CRm)]>;
[(int_arm_mcrr timm:$cop, timm:$opc1, GPRnopc:$Rt,
GPRnopc:$Rt2, timm:$CRm)]>;
def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */,
(outs GPRnopc:$Rt, GPRnopc:$Rt2),
(ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRm), []>;
@ -5480,8 +5480,8 @@ class MovRRCopro2<string opc, bit direction, dag oops, dag iops,
def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */,
(outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt,
GPRnopc:$Rt2, c_imm:$CRm),
[(int_arm_mcrr2 imm:$cop, imm:$opc1, GPRnopc:$Rt,
GPRnopc:$Rt2, imm:$CRm)]>;
[(int_arm_mcrr2 timm:$cop, timm:$opc1, GPRnopc:$Rt,
GPRnopc:$Rt2, timm:$CRm)]>;
def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */,
(outs GPRnopc:$Rt, GPRnopc:$Rt2),
@ -6159,7 +6159,7 @@ def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>,
let mayLoad = 1, mayStore =1, hasSideEffects = 1 in
def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
NoItinerary,
[(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>;
[(set GPR:$Rd, (int_arm_space timm:$size, GPR:$Rn))]>;
//===----------------------------------
// Atomic cmpxchg for -O0

View File

@ -4175,15 +4175,15 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm, list<dag>
}
let DecoderNamespace = "Thumb2CoProc" in {
defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>;
defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
}
@ -4368,8 +4368,8 @@ def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
(outs),
(ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
[(int_arm_mcr timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn,
timm:$CRm, timm:$opc2)]>,
ComplexDeprecationPredicate<"MCR">;
def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
(t2MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
@ -4377,8 +4377,8 @@ def : t2InstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm",
def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
(outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, imm0_7:$opc2),
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]> {
[(int_arm_mcr2 timm:$cop, timm:$opc1, GPR:$Rt, timm:$CRn,
timm:$CRm, timm:$opc2)]> {
let Predicates = [IsThumb2, PreV8];
}
def : t2InstAlias<"mcr2${p} $cop, $opc1, $Rt, $CRn, $CRm",
@ -4402,24 +4402,24 @@ def : t2InstAlias<"mrc2${p} $cop, $opc1, $Rt, $CRn, $CRm",
(t2MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0, pred:$p)>;
def : T2v6Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
def : T2v6Pat<(int_arm_mrc timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2),
(t2MRC p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>;
def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
(t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
def : T2v6Pat<(int_arm_mrc2 timm:$cop, timm:$opc1, timm:$CRn, timm:$CRm, timm:$opc2),
(t2MRC2 p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2)>;
/* from ARM core register to coprocessor */
def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0, (outs),
(ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2,
c_imm:$CRm),
[(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
imm:$CRm)]>;
[(int_arm_mcrr timm:$cop, timm:$opc1, GPR:$Rt, GPR:$Rt2,
timm:$CRm)]>;
def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0, (outs),
(ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2,
c_imm:$CRm),
[(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
GPR:$Rt2, imm:$CRm)]> {
[(int_arm_mcrr2 timm:$cop, timm:$opc1, GPR:$Rt,
GPR:$Rt2, timm:$CRm)]> {
let Predicates = [IsThumb2, PreV8];
}
@ -4439,8 +4439,8 @@ def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1, (outs GPR:$Rt, GPR:$Rt2),
def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
"cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
imm:$CRm, imm:$opc2)]> {
[(int_arm_cdp timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn,
timm:$CRm, timm:$opc2)]> {
let Inst{27-24} = 0b1110;
bits<4> opc1;
@ -4465,8 +4465,8 @@ def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
"cdp2", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
[(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
imm:$CRm, imm:$opc2)]> {
[(int_arm_cdp2 timm:$cop, timm:$opc1, timm:$CRd, timm:$CRn,
timm:$CRm, timm:$opc2)]> {
let Inst{27-24} = 0b1110;
bits<4> opc1;

File diff suppressed because it is too large Load Diff

View File

@ -8,120 +8,125 @@
// Automatically generated file, please consult code owner before editing.
//===----------------------------------------------------------------------===//
multiclass ImmOpPred<code pred, ValueType vt = i32> {
def "" : PatLeaf<(vt imm), pred>;
def _timm : PatLeaf<(vt timm), pred>;
}
def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand; let DecoderMethod = "s4_0ImmDecoder"; }
def s4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 0>(N->getSExtValue());}]>;
defm s4_0ImmPred : ImmOpPred<[{ return isShiftedInt<4, 0>(N->getSExtValue());}]>;
def s29_3ImmOperand : AsmOperandClass { let Name = "s29_3Imm"; let RenderMethod = "addSignedImmOperands"; }
def s29_3Imm : Operand<i32> { let ParserMatchClass = s29_3ImmOperand; let DecoderMethod = "s29_3ImmDecoder"; }
def s29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 3>(N->getSExtValue());}]>;
defm s29_3ImmPred : ImmOpPred<[{ return isShiftedInt<32, 3>(N->getSExtValue());}]>;
def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; let RenderMethod = "addImmOperands"; }
def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>;
defm u6_0ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 0>(N->getSExtValue());}]>;
def a30_2ImmOperand : AsmOperandClass { let Name = "a30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def a30_2Imm : Operand<i32> { let ParserMatchClass = a30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
def a30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
defm a30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
def u29_3ImmOperand : AsmOperandClass { let Name = "u29_3Imm"; let RenderMethod = "addImmOperands"; }
def u29_3Imm : Operand<i32> { let ParserMatchClass = u29_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u29_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>;
defm u29_3ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 3>(N->getSExtValue());}]>;
def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s8_0Imm : Operand<i32> { let ParserMatchClass = s8_0ImmOperand; let DecoderMethod = "s8_0ImmDecoder"; }
def s8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<8, 0>(N->getSExtValue());}]>;
defm s8_0ImmPred : ImmOpPred<[{ return isShiftedInt<8, 0>(N->getSExtValue());}]>;
def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; let RenderMethod = "addImmOperands"; }
def u32_0Imm : Operand<i32> { let ParserMatchClass = u32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>;
defm u32_0ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 0>(N->getSExtValue());}]>;
def u4_2ImmOperand : AsmOperandClass { let Name = "u4_2Imm"; let RenderMethod = "addImmOperands"; }
def u4_2Imm : Operand<i32> { let ParserMatchClass = u4_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>;
defm u4_2ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 2>(N->getSExtValue());}]>;
def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; let RenderMethod = "addImmOperands"; }
def u3_0Imm : Operand<i32> { let ParserMatchClass = u3_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>;
defm u3_0ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 0>(N->getSExtValue());}]>;
def b15_2ImmOperand : AsmOperandClass { let Name = "b15_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def b15_2Imm : Operand<OtherVT> { let ParserMatchClass = b15_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
def b15_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<15, 2>(N->getSExtValue());}]>;
defm b15_2ImmPred : ImmOpPred<[{ return isShiftedInt<15, 2>(N->getSExtValue());}]>;
def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; let RenderMethod = "addImmOperands"; }
def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u11_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>;
defm u11_3ImmPred : ImmOpPred<[{ return isShiftedUInt<11, 3>(N->getSExtValue());}]>;
def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; let RenderMethod = "addSignedImmOperands"; }
def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand; let DecoderMethod = "s4_3ImmDecoder"; }
def s4_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 3>(N->getSExtValue());}]>;
defm s4_3ImmPred : ImmOpPred<[{ return isShiftedInt<4, 3>(N->getSExtValue());}]>;
def m32_0ImmOperand : AsmOperandClass { let Name = "m32_0Imm"; let RenderMethod = "addImmOperands"; }
def m32_0Imm : Operand<i32> { let ParserMatchClass = m32_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def m32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
defm m32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
def u3_1ImmOperand : AsmOperandClass { let Name = "u3_1Imm"; let RenderMethod = "addImmOperands"; }
def u3_1Imm : Operand<i32> { let ParserMatchClass = u3_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u3_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>;
defm u3_1ImmPred : ImmOpPred<[{ return isShiftedUInt<3, 1>(N->getSExtValue());}]>;
def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; let RenderMethod = "addImmOperands"; }
def u1_0Imm : Operand<i32> { let ParserMatchClass = u1_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u1_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>;
defm u1_0ImmPred : ImmOpPred<[{ return isShiftedUInt<1, 0>(N->getSExtValue());}]>;
def s31_1ImmOperand : AsmOperandClass { let Name = "s31_1Imm"; let RenderMethod = "addSignedImmOperands"; }
def s31_1Imm : Operand<i32> { let ParserMatchClass = s31_1ImmOperand; let DecoderMethod = "s31_1ImmDecoder"; }
def s31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 1>(N->getSExtValue());}]>;
defm s31_1ImmPred : ImmOpPred<[{ return isShiftedInt<32, 1>(N->getSExtValue());}]>;
def s3_0ImmOperand : AsmOperandClass { let Name = "s3_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s3_0Imm : Operand<i32> { let ParserMatchClass = s3_0ImmOperand; let DecoderMethod = "s3_0ImmDecoder"; }
def s3_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<3, 0>(N->getSExtValue());}]>;
defm s3_0ImmPred : ImmOpPred<[{ return isShiftedInt<3, 0>(N->getSExtValue());}]>;
def s30_2ImmOperand : AsmOperandClass { let Name = "s30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def s30_2Imm : Operand<i32> { let ParserMatchClass = s30_2ImmOperand; let DecoderMethod = "s30_2ImmDecoder"; }
def s30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
defm s30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; let RenderMethod = "addImmOperands"; }
def u4_0Imm : Operand<i32> { let ParserMatchClass = u4_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u4_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>;
defm u4_0ImmPred : ImmOpPred<[{ return isShiftedUInt<4, 0>(N->getSExtValue());}]>;
def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s6_0Imm : Operand<i32> { let ParserMatchClass = s6_0ImmOperand; let DecoderMethod = "s6_0ImmDecoder"; }
def s6_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 0>(N->getSExtValue());}]>;
defm s6_0ImmPred : ImmOpPred<[{ return isShiftedInt<6, 0>(N->getSExtValue());}]>;
def u5_3ImmOperand : AsmOperandClass { let Name = "u5_3Imm"; let RenderMethod = "addImmOperands"; }
def u5_3Imm : Operand<i32> { let ParserMatchClass = u5_3ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u5_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>;
defm u5_3ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 3>(N->getSExtValue());}]>;
def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; let RenderMethod = "addSignedImmOperands"; }
def s32_0Imm : Operand<i32> { let ParserMatchClass = s32_0ImmOperand; let DecoderMethod = "s32_0ImmDecoder"; }
def s32_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
defm s32_0ImmPred : ImmOpPred<[{ return isShiftedInt<32, 0>(N->getSExtValue());}]>;
def s6_3ImmOperand : AsmOperandClass { let Name = "s6_3Imm"; let RenderMethod = "addSignedImmOperands"; }
def s6_3Imm : Operand<i32> { let ParserMatchClass = s6_3ImmOperand; let DecoderMethod = "s6_3ImmDecoder"; }
def s6_3ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<6, 3>(N->getSExtValue());}]>;
defm s6_3ImmPred : ImmOpPred<[{ return isShiftedInt<6, 3>(N->getSExtValue());}]>;
def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; let RenderMethod = "addImmOperands"; }
def u10_0Imm : Operand<i32> { let ParserMatchClass = u10_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u10_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>;
defm u10_0ImmPred : ImmOpPred<[{ return isShiftedUInt<10, 0>(N->getSExtValue());}]>;
def u31_1ImmOperand : AsmOperandClass { let Name = "u31_1Imm"; let RenderMethod = "addImmOperands"; }
def u31_1Imm : Operand<i32> { let ParserMatchClass = u31_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u31_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>;
defm u31_1ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 1>(N->getSExtValue());}]>;
def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; let RenderMethod = "addSignedImmOperands"; }
def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand; let DecoderMethod = "s4_1ImmDecoder"; }
def s4_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 1>(N->getSExtValue());}]>;
defm s4_1ImmPred : ImmOpPred<[{ return isShiftedInt<4, 1>(N->getSExtValue());}]>;
def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; let RenderMethod = "addImmOperands"; }
def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u16_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>;
defm u16_0ImmPred : ImmOpPred<[{ return isShiftedUInt<16, 0>(N->getSExtValue());}]>;
def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; let RenderMethod = "addImmOperands"; }
def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u6_1ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>;
defm u6_1ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 1>(N->getSExtValue());}]>;
def u5_2ImmOperand : AsmOperandClass { let Name = "u5_2Imm"; let RenderMethod = "addImmOperands"; }
def u5_2Imm : Operand<i32> { let ParserMatchClass = u5_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u5_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>;
defm u5_2ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 2>(N->getSExtValue());}]>;
def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; let RenderMethod = "addImmOperands"; }
def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u26_6ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>;
defm u26_6ImmPred : ImmOpPred<[{ return isShiftedUInt<26, 6>(N->getSExtValue());}]>;
def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; let RenderMethod = "addImmOperands"; }
def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u6_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>;
defm u6_2ImmPred : ImmOpPred<[{ return isShiftedUInt<6, 2>(N->getSExtValue());}]>;
def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; let RenderMethod = "addImmOperands"; }
def u7_0Imm : Operand<i32> { let ParserMatchClass = u7_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u7_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>;
defm u7_0ImmPred : ImmOpPred<[{ return isShiftedUInt<7, 0>(N->getSExtValue());}]>;
def b13_2ImmOperand : AsmOperandClass { let Name = "b13_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def b13_2Imm : Operand<OtherVT> { let ParserMatchClass = b13_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
def b13_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<13, 2>(N->getSExtValue());}]>;
defm b13_2ImmPred : ImmOpPred<[{ return isShiftedInt<13, 2>(N->getSExtValue());}]>;
def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; let RenderMethod = "addImmOperands"; }
def u5_0Imm : Operand<i32> { let ParserMatchClass = u5_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u5_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>;
defm u5_0ImmPred : ImmOpPred<[{ return isShiftedUInt<5, 0>(N->getSExtValue());}]>;
def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; let RenderMethod = "addImmOperands"; }
def u2_0Imm : Operand<i32> { let ParserMatchClass = u2_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u2_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>;
defm u2_0ImmPred : ImmOpPred<[{ return isShiftedUInt<2, 0>(N->getSExtValue());}]>;
def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand; let DecoderMethod = "s4_2ImmDecoder"; }
def s4_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<4, 2>(N->getSExtValue());}]>;
defm s4_2ImmPred : ImmOpPred<[{ return isShiftedInt<4, 2>(N->getSExtValue());}]>;
def b30_2ImmOperand : AsmOperandClass { let Name = "b30_2Imm"; let RenderMethod = "addSignedImmOperands"; }
def b30_2Imm : Operand<OtherVT> { let ParserMatchClass = b30_2ImmOperand; let DecoderMethod = "brtargetDecoder"; let PrintMethod = "printBrtarget"; }
def b30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
defm b30_2ImmPred : ImmOpPred<[{ return isShiftedInt<32, 2>(N->getSExtValue());}]>;
def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; let RenderMethod = "addImmOperands"; }
def u8_0Imm : Operand<i32> { let ParserMatchClass = u8_0ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u8_0ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>;
defm u8_0ImmPred : ImmOpPred<[{ return isShiftedUInt<8, 0>(N->getSExtValue());}]>;
def u30_2ImmOperand : AsmOperandClass { let Name = "u30_2Imm"; let RenderMethod = "addImmOperands"; }
def u30_2Imm : Operand<i32> { let ParserMatchClass = u30_2ImmOperand; let DecoderMethod = "unsignedImmDecoder"; }
def u30_2ImmPred : PatLeaf<(i32 imm), [{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>;
defm u30_2ImmPred : ImmOpPred<[{ return isShiftedUInt<32, 2>(N->getSExtValue());}]>;

View File

@ -22,14 +22,14 @@ class T_RP_pat <InstHexagon MI, Intrinsic IntID>
def: Pat<(int_hexagon_A2_add IntRegs:$Rs, IntRegs:$Rt),
(A2_add IntRegs:$Rs, IntRegs:$Rt)>;
def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, imm:$s16),
def: Pat<(int_hexagon_A2_addi IntRegs:$Rs, timm:$s16),
(A2_addi IntRegs:$Rs, imm:$s16)>;
def: Pat<(int_hexagon_A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt),
(A2_addp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
def: Pat<(int_hexagon_A2_sub IntRegs:$Rs, IntRegs:$Rt),
(A2_sub IntRegs:$Rs, IntRegs:$Rt)>;
def: Pat<(int_hexagon_A2_subri imm:$s10, IntRegs:$Rs),
def: Pat<(int_hexagon_A2_subri timm:$s10, IntRegs:$Rs),
(A2_subri imm:$s10, IntRegs:$Rs)>;
def: Pat<(int_hexagon_A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt),
(A2_subp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
@ -45,26 +45,26 @@ def: Pat<(int_hexagon_M2_dpmpyss_s0 IntRegs:$Rs, IntRegs:$Rt),
def: Pat<(int_hexagon_M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt),
(M2_dpmpyuu_s0 IntRegs:$Rs, IntRegs:$Rt)>;
def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, imm:$u5),
def: Pat<(int_hexagon_S2_asl_i_r IntRegs:$Rs, timm:$u5),
(S2_asl_i_r IntRegs:$Rs, imm:$u5)>;
def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, imm:$u5),
def: Pat<(int_hexagon_S2_lsr_i_r IntRegs:$Rs, timm:$u5),
(S2_lsr_i_r IntRegs:$Rs, imm:$u5)>;
def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, imm:$u5),
def: Pat<(int_hexagon_S2_asr_i_r IntRegs:$Rs, timm:$u5),
(S2_asr_i_r IntRegs:$Rs, imm:$u5)>;
def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, imm:$u6),
def: Pat<(int_hexagon_S2_asl_i_p DoubleRegs:$Rs, timm:$u6),
(S2_asl_i_p DoubleRegs:$Rs, imm:$u6)>;
def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, imm:$u6),
def: Pat<(int_hexagon_S2_lsr_i_p DoubleRegs:$Rs, timm:$u6),
(S2_lsr_i_p DoubleRegs:$Rs, imm:$u6)>;
def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, imm:$u6),
def: Pat<(int_hexagon_S2_asr_i_p DoubleRegs:$Rs, timm:$u6),
(S2_asr_i_p DoubleRegs:$Rs, imm:$u6)>;
def: Pat<(int_hexagon_A2_and IntRegs:$Rs, IntRegs:$Rt),
(A2_and IntRegs:$Rs, IntRegs:$Rt)>;
def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, imm:$s10),
def: Pat<(int_hexagon_A2_andir IntRegs:$Rs, timm:$s10),
(A2_andir IntRegs:$Rs, imm:$s10)>;
def: Pat<(int_hexagon_A2_or IntRegs:$Rs, IntRegs:$Rt),
(A2_or IntRegs:$Rs, IntRegs:$Rt)>;
def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, imm:$s10),
def: Pat<(int_hexagon_A2_orir IntRegs:$Rs, timm:$s10),
(A2_orir IntRegs:$Rs, imm:$s10)>;
def: Pat<(int_hexagon_A2_xor IntRegs:$Rs, IntRegs:$Rt),
(A2_xor IntRegs:$Rs, IntRegs:$Rt)>;
@ -99,13 +99,13 @@ def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, (i32 0)),
(S2_vsathub I64:$Rs)>;
}
def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred:$imm),
def : Pat <(int_hexagon_S2_asr_i_r_rnd_goodsyntax I32:$Rs, u5_0ImmPred_timm:$imm),
(S2_asr_i_r_rnd I32:$Rs, (UDEC1 u5_0ImmPred:$imm))>;
def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred:$imm),
def : Pat <(int_hexagon_S2_asr_i_p_rnd_goodsyntax I64:$Rs, u6_0ImmPred_timm:$imm),
(S2_asr_i_p_rnd I64:$Rs, (UDEC1 u6_0ImmPred:$imm))>;
def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred:$imm),
def : Pat <(int_hexagon_S5_vasrhrnd_goodsyntax I64:$Rs, u4_0ImmPred_timm:$imm),
(S5_vasrhrnd I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>;
def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred:$imm),
def : Pat <(int_hexagon_S5_asrhub_rnd_sat_goodsyntax I64:$Rs, u4_0ImmPred_timm:$imm),
(S5_asrhub_rnd_sat I64:$Rs, (UDEC1 u4_0ImmPred:$imm))>;
def ImmExt64: SDNodeXForm<imm, [{
@ -121,13 +121,13 @@ def ImmExt64: SDNodeXForm<imm, [{
// To connect the builtin with the instruction, the builtin's operand
// needs to be extended to the right type.
def : Pat<(int_hexagon_A2_tfrpi imm:$Is),
def : Pat<(int_hexagon_A2_tfrpi timm:$Is),
(A2_tfrpi (ImmExt64 $Is))>;
def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred:$src2),
def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred_timm:$src2),
(C2_tfrpr (C2_cmpgti I32:$src1, (SDEC1 s32_0ImmPred:$src2)))>;
def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred:$src2),
def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred_timm:$src2),
(C2_tfrpr (C2_cmpgtui I32:$src1, (UDEC1 u32_0ImmPred:$src2)))>;
def : Pat <(int_hexagon_C2_cmpgeui I32:$src, 0),
@ -142,7 +142,7 @@ def : Pat <(int_hexagon_C2_cmpltu I32:$src1, I32:$src2),
//===----------------------------------------------------------------------===//
class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst,
SDNodeXForm XformImm>
: Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred:$src3, u5_0ImmPred:$src4),
: Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred_timm:$src3, u5_0ImmPred_timm:$src4),
(OutputInst I32:$src1, I32:$src2, u4_0ImmPred:$src3,
(XformImm u5_0ImmPred:$src4))>;
@ -197,11 +197,11 @@ class T_stc_pat <InstHexagon MI, Intrinsic IntID, PatLeaf Imm, PatLeaf Val>
: Pat<(IntID I32:$Rs, Val:$Rt, I32:$Ru, Imm:$s),
(MI I32:$Rs, Imm:$s, I32:$Ru, Val:$Rt)>;
def: T_stc_pat<S2_storerb_pci, int_hexagon_circ_stb, s4_0ImmPred, I32>;
def: T_stc_pat<S2_storerh_pci, int_hexagon_circ_sth, s4_1ImmPred, I32>;
def: T_stc_pat<S2_storeri_pci, int_hexagon_circ_stw, s4_2ImmPred, I32>;
def: T_stc_pat<S2_storerd_pci, int_hexagon_circ_std, s4_3ImmPred, I64>;
def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
def: T_stc_pat<S2_storerb_pci, int_hexagon_circ_stb, s4_0ImmPred_timm, I32>;
def: T_stc_pat<S2_storerh_pci, int_hexagon_circ_sth, s4_1ImmPred_timm, I32>;
def: T_stc_pat<S2_storeri_pci, int_hexagon_circ_stw, s4_2ImmPred_timm, I32>;
def: T_stc_pat<S2_storerd_pci, int_hexagon_circ_std, s4_3ImmPred_timm, I64>;
def: T_stc_pat<S2_storerf_pci, int_hexagon_circ_sthhi, s4_1ImmPred_timm, I32>;
multiclass MaskedStore <InstHexagon MI, Intrinsic IntID> {
def : Pat<(IntID HvxQR:$src1, IntRegs:$src2, HvxVR:$src3),

View File

@ -360,7 +360,7 @@ class RDDSP_MM_DESC {
dag OutOperandList = (outs GPR32Opnd:$rt);
dag InOperandList = (ins uimm7:$mask);
string AsmString = !strconcat("rddsp", "\t$rt, $mask");
list<dag> Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp immZExt7:$mask))];
list<dag> Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp timmZExt7:$mask))];
InstrItinClass Itinerary = NoItinerary;
}
@ -383,7 +383,7 @@ class WRDSP_MM_DESC {
dag OutOperandList = (outs);
dag InOperandList = (ins GPR32Opnd:$rt, uimm7:$mask);
string AsmString = !strconcat("wrdsp", "\t$rt, $mask");
list<dag> Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, immZExt7:$mask)];
list<dag> Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, timmZExt7:$mask)];
InstrItinClass Itinerary = NoItinerary;
bit isMoveReg = 1;
}

View File

@ -16,6 +16,7 @@
// shamt must fit in 6 bits.
def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
def timmZExt6 : TImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
// Node immediate fits as 10-bit sign extended on target immediate.
// e.g. seqi, snei

View File

@ -12,12 +12,19 @@
// ImmLeaf
def immZExt1 : ImmLeaf<i32, [{return isUInt<1>(Imm);}]>;
def timmZExt1 : ImmLeaf<i32, [{return isUInt<1>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}]>;
def timmZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}]>;
def timmZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>;
def timmZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>;
def timmZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}], NOOP_SDNodeXForm, timm>;
def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
def timmZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}], NOOP_SDNodeXForm, timm>;
def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
def timmSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}], NOOP_SDNodeXForm, timm>;
def immSExt10 : ImmLeaf<i32, [{return isInt<10>(Imm);}]>;
// Mips-specific dsp nodes
@ -306,7 +313,7 @@ class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag OutOperandList = (outs ROT:$rt);
dag InOperandList = (ins ROS:$rs, uimm5:$sa, ROS:$src);
string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
list<dag> Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))];
list<dag> Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, timmZExt5:$sa))];
InstrItinClass Itinerary = itin;
string Constraints = "$src = $rt";
string BaseOpcode = instr_asm;
@ -443,7 +450,7 @@ class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag OutOperandList = (outs GPR32Opnd:$rd);
dag InOperandList = (ins uimm10:$mask);
string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))];
list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode timmZExt10:$mask))];
InstrItinClass Itinerary = itin;
string BaseOpcode = instr_asm;
bit isMoveReg = 1;
@ -454,7 +461,7 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
dag OutOperandList = (outs);
dag InOperandList = (ins GPR32Opnd:$rs, uimm10:$mask);
string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
list<dag> Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)];
list<dag> Pattern = [(OpNode GPR32Opnd:$rs, timmZExt10:$mask)];
InstrItinClass Itinerary = itin;
string BaseOpcode = instr_asm;
bit isMoveReg = 1;
@ -1096,14 +1103,14 @@ class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
NoItinerary, DSPROpnd>;
// Misc
class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, immZExt5,
class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, timmZExt5,
NoItinerary>;
class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, immZExt2,
class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, timmZExt2,
NoItinerary>;
class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5,
immZExt5, NoItinerary>;
timmZExt5, NoItinerary>;
// Pseudos.
def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,

View File

@ -1263,6 +1263,7 @@ def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
// Node immediate fits as 7-bit zero extended on target immediate.
def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>;
def timmZExt7 : PatLeaf<(timm), [{ return isUInt<7>(N->getZExtValue()); }]>;
// Node immediate fits as 16-bit zero extended on target immediate.
// The LO16 param means that only the lower 16 bits of the node
@ -1295,6 +1296,7 @@ def immZExt32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>;
// shamt field must fit in 5 bits.
def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
def timmZExt5 : TImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
def immZExt5Plus1 : PatLeaf<(imm), [{
return isUInt<5>(N->getZExtValue() - 1);

View File

@ -60,6 +60,11 @@ def immZExt2Ptr : ImmLeaf<iPTR, [{return isUInt<2>(Imm);}]>;
def immZExt3Ptr : ImmLeaf<iPTR, [{return isUInt<3>(Imm);}]>;
def immZExt4Ptr : ImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
def timmZExt1Ptr : TImmLeaf<iPTR, [{return isUInt<1>(Imm);}]>;
def timmZExt2Ptr : TImmLeaf<iPTR, [{return isUInt<2>(Imm);}]>;
def timmZExt3Ptr : TImmLeaf<iPTR, [{return isUInt<3>(Imm);}]>;
def timmZExt4Ptr : TImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
// Operands
def immZExt2Lsa : ImmLeaf<i32, [{return isUInt<2>(Imm - 1);}]>;
@ -1270,7 +1275,7 @@ class MSA_I8_SHF_DESC_BASE<string instr_asm, RegisterOperand ROWD,
dag OutOperandList = (outs ROWD:$wd);
dag InOperandList = (ins ROWS:$ws, uimm8:$u8);
string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $u8");
list<dag> Pattern = [(set ROWD:$wd, (MipsSHF immZExt8:$u8, ROWS:$ws))];
list<dag> Pattern = [(set ROWD:$wd, (MipsSHF timmZExt8:$u8, ROWS:$ws))];
InstrItinClass Itinerary = itin;
}
@ -2299,13 +2304,13 @@ class INSERT_FW_VIDX64_PSEUDO_DESC :
class INSERT_FD_VIDX64_PSEUDO_DESC :
MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2f64, MSA128DOpnd, FGR64Opnd, GPR64Opnd>;
class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, immZExt4,
class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, timmZExt4,
MSA128BOpnd>;
class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, immZExt3,
class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, timmZExt3,
MSA128HOpnd>;
class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, immZExt2,
class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, timmZExt2,
MSA128WOpnd>;
class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, immZExt1,
class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, timmZExt1,
MSA128DOpnd>;
class LD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@ -2518,22 +2523,22 @@ class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", ctpop, MSA128WOpnd>;
class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>;
class SAT_S_B_DESC : MSA_BIT_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, uimm3,
immZExt3, MSA128BOpnd>;
timmZExt3, MSA128BOpnd>;
class SAT_S_H_DESC : MSA_BIT_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, uimm4,
immZExt4, MSA128HOpnd>;
timmZExt4, MSA128HOpnd>;
class SAT_S_W_DESC : MSA_BIT_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, uimm5,
immZExt5, MSA128WOpnd>;
timmZExt5, MSA128WOpnd>;
class SAT_S_D_DESC : MSA_BIT_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, uimm6,
immZExt6, MSA128DOpnd>;
timmZExt6, MSA128DOpnd>;
class SAT_U_B_DESC : MSA_BIT_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, uimm3,
immZExt3, MSA128BOpnd>;
timmZExt3, MSA128BOpnd>;
class SAT_U_H_DESC : MSA_BIT_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, uimm4,
immZExt4, MSA128HOpnd>;
timmZExt4, MSA128HOpnd>;
class SAT_U_W_DESC : MSA_BIT_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, uimm5,
immZExt5, MSA128WOpnd>;
timmZExt5, MSA128WOpnd>;
class SAT_U_D_DESC : MSA_BIT_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, uimm6,
immZExt6, MSA128DOpnd>;
timmZExt6, MSA128DOpnd>;
class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>;
class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>;
@ -2546,16 +2551,16 @@ class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>;
class SLDI_B_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.b", int_mips_sldi_b,
MSA128BOpnd, MSA128BOpnd, uimm4,
immZExt4>;
timmZExt4>;
class SLDI_H_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.h", int_mips_sldi_h,
MSA128HOpnd, MSA128HOpnd, uimm3,
immZExt3>;
timmZExt3>;
class SLDI_W_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.w", int_mips_sldi_w,
MSA128WOpnd, MSA128WOpnd, uimm2,
immZExt2>;
timmZExt2>;
class SLDI_D_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.d", int_mips_sldi_d,
MSA128DOpnd, MSA128DOpnd, uimm1,
immZExt1>;
timmZExt1>;
class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>;
class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>;
@ -2609,13 +2614,13 @@ class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>;
class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>;
class SRARI_B_DESC : MSA_BIT_X_DESC_BASE<"srari.b", int_mips_srari_b, uimm3,
immZExt3, MSA128BOpnd>;
timmZExt3, MSA128BOpnd>;
class SRARI_H_DESC : MSA_BIT_X_DESC_BASE<"srari.h", int_mips_srari_h, uimm4,
immZExt4, MSA128HOpnd>;
timmZExt4, MSA128HOpnd>;
class SRARI_W_DESC : MSA_BIT_X_DESC_BASE<"srari.w", int_mips_srari_w, uimm5,
immZExt5, MSA128WOpnd>;
timmZExt5, MSA128WOpnd>;
class SRARI_D_DESC : MSA_BIT_X_DESC_BASE<"srari.d", int_mips_srari_d, uimm6,
immZExt6, MSA128DOpnd>;
timmZExt6, MSA128DOpnd>;
class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>;
class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>;
@ -2637,13 +2642,13 @@ class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>;
class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>;
class SRLRI_B_DESC : MSA_BIT_X_DESC_BASE<"srlri.b", int_mips_srlri_b, uimm3,
immZExt3, MSA128BOpnd>;
timmZExt3, MSA128BOpnd>;
class SRLRI_H_DESC : MSA_BIT_X_DESC_BASE<"srlri.h", int_mips_srlri_h, uimm4,
immZExt4, MSA128HOpnd>;
timmZExt4, MSA128HOpnd>;
class SRLRI_W_DESC : MSA_BIT_X_DESC_BASE<"srlri.w", int_mips_srlri_w, uimm5,
immZExt5, MSA128WOpnd>;
timmZExt5, MSA128WOpnd>;
class SRLRI_D_DESC : MSA_BIT_X_DESC_BASE<"srlri.d", int_mips_srlri_d, uimm6,
immZExt6, MSA128DOpnd>;
timmZExt6, MSA128DOpnd>;
class ST_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
ValueType TyNode, RegisterOperand ROWD,

View File

@ -2596,7 +2596,8 @@ static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy,
SDLoc DL(Op);
return DAG.getNode(MipsISD::SHF, DL, ResTy,
DAG.getConstant(Imm, DL, MVT::i32), Op->getOperand(0));
DAG.getTargetConstant(Imm, DL, MVT::i32),
Op->getOperand(0));
}
/// Determine whether a range fits a regular pattern of values.

View File

@ -331,7 +331,7 @@ class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
: VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
!strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
[(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>;
[(set Ty:$vD, (IntID Ty:$vA, timm:$ST, timm:$SIX))]>;
//===----------------------------------------------------------------------===//
// Instruction Definitions.
@ -401,10 +401,10 @@ let isCodeGenOnly = 1 in {
def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
"mfvscr $vD", IIC_LdStStore,
[(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
[(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
"mtvscr $vB", IIC_LdStLoad,
[(int_ppc_altivec_mtvscr v4i32:$vB)]>;
[(int_ppc_altivec_mtvscr v4i32:$vB)]>;
let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads.
def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src),

View File

@ -2868,12 +2868,12 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
(outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
"xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
[(set v4i32: $XT,
(int_ppc_vsx_xvtstdcsp v4f32:$XB, imm:$DCMX))]>;
(int_ppc_vsx_xvtstdcsp v4f32:$XB, timm:$DCMX))]>;
def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5,
(outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
"xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
[(set v2i64: $XT,
(int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>;
(int_ppc_vsx_xvtstdcdp v2f64:$XB, timm:$DCMX))]>;
//===--------------------------------------------------------------------===//

View File

@ -214,12 +214,12 @@ class PseudoMaskedAMOUMinUMax
}
class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering),
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>;
class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
: Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
imm:$ordering),
timm:$ordering),
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
imm:$ordering)>;
@ -288,7 +288,7 @@ def PseudoMaskedCmpXchg32
}
def : Pat<(int_riscv_masked_cmpxchg_i32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering),
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
(PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
@ -365,7 +365,7 @@ def PseudoCmpXchg64 : PseudoCmpXchg;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>;
def : Pat<(int_riscv_masked_cmpxchg_i64
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering),
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
(PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
} // Predicates = [HasStdExtA, IsRV64]

View File

@ -1146,7 +1146,7 @@ void SystemZDAGToDAGISel::loadVectorConstant(
SDLoc DL(Node);
SmallVector<SDValue, 2> Ops;
for (unsigned OpVal : VCI.OpVals)
Ops.push_back(CurDAG->getConstant(OpVal, DL, MVT::i32));
Ops.push_back(CurDAG->getTargetConstant(OpVal, DL, MVT::i32));
SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops);
if (VCI.VecVT == VT.getSimpleVT())
@ -1550,8 +1550,8 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
uint64_t ConstCCMask =
cast<ConstantSDNode>(CCMask.getNode())->getZExtValue();
// Invert the condition.
CCMask = CurDAG->getConstant(ConstCCValid ^ ConstCCMask, SDLoc(Node),
CCMask.getValueType());
CCMask = CurDAG->getTargetConstant(ConstCCValid ^ ConstCCMask,
SDLoc(Node), CCMask.getValueType());
SDValue Op4 = Node->getOperand(4);
SDNode *UpdatedNode =
CurDAG->UpdateNodeOperands(Node, Op1, Op0, CCValid, CCMask, Op4);

View File

@ -2549,12 +2549,12 @@ static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
}
if (C.Opcode == SystemZISD::ICMP)
return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
DAG.getConstant(C.ICmpType, DL, MVT::i32));
DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
if (C.Opcode == SystemZISD::TM) {
bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
DAG.getConstant(RegisterOnly, DL, MVT::i32));
DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
}
return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
}
@ -2592,10 +2592,10 @@ static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
// in CCValid, so other values can be ignored.
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
unsigned CCValid, unsigned CCMask) {
SDValue Ops[] = { DAG.getConstant(1, DL, MVT::i32),
DAG.getConstant(0, DL, MVT::i32),
DAG.getConstant(CCValid, DL, MVT::i32),
DAG.getConstant(CCMask, DL, MVT::i32), CCReg };
SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
DAG.getConstant(0, DL, MVT::i32),
DAG.getTargetConstant(CCValid, DL, MVT::i32),
DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
}
@ -2757,9 +2757,10 @@ SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
SDValue CCReg = emitCmp(DAG, DL, C);
return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
Op.getOperand(0), DAG.getConstant(C.CCValid, DL, MVT::i32),
DAG.getConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
return DAG.getNode(
SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
}
// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
@ -2810,8 +2811,9 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
}
SDValue CCReg = emitCmp(DAG, DL, C);
SDValue Ops[] = {TrueOp, FalseOp, DAG.getConstant(C.CCValid, DL, MVT::i32),
DAG.getConstant(C.CCMask, DL, MVT::i32), CCReg};
SDValue Ops[] = {TrueOp, FalseOp,
DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
}
@ -3898,11 +3900,8 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
SDValue Ops[] = {
Op.getOperand(0),
DAG.getConstant(Code, DL, MVT::i32),
Op.getOperand(1)
};
SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
Op.getOperand(1)};
return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
Node->getVTList(), Ops,
Node->getMemoryVT(), Node->getMemOperand());
@ -4244,7 +4243,7 @@ static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
SDValue Op;
if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
SDValue Op2 = DAG.getConstant(P.Operand, DL, MVT::i32);
SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
} else if (P.Opcode == SystemZISD::PACK) {
MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
@ -4269,7 +4268,8 @@ static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
unsigned StartIndex, OpNo0, OpNo1;
if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
Ops[OpNo1], DAG.getConstant(StartIndex, DL, MVT::i32));
Ops[OpNo1],
DAG.getTargetConstant(StartIndex, DL, MVT::i32));
// Fall back on VPERM. Construct an SDNode for the permute vector.
SDValue IndexNodes[SystemZ::VectorBytes];
@ -4767,7 +4767,7 @@ SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
// Otherwise keep it as a vector-to-vector operation.
return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
DAG.getConstant(Index, DL, MVT::i32));
DAG.getTargetConstant(Index, DL, MVT::i32));
}
GeneralShuffle GS(VT);
@ -6057,8 +6057,8 @@ SDValue SystemZTargetLowering::combineBR_CCMASK(
if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
Chain,
DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
N->getOperand(3), CCReg);
return SDValue();
}
@ -6079,10 +6079,9 @@ SDValue SystemZTargetLowering::combineSELECT_CCMASK(
if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
N->getOperand(0),
N->getOperand(1),
DAG.getConstant(CCValidVal, SDLoc(N), MVT::i32),
DAG.getConstant(CCMaskVal, SDLoc(N), MVT::i32),
N->getOperand(0), N->getOperand(1),
DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
CCReg);
return SDValue();
}

View File

@ -2141,17 +2141,17 @@ class FixedCondBranchRXY<CondVariant V, string mnemonic, bits<16> opcode,
}
class CmpBranchRIEa<string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, cond4:$M3),
mnemonic#"$M3\t$R1, $I2", []>;
class AsmCmpBranchRIEa<string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, imm32zx4:$M3),
mnemonic#"\t$R1, $I2, $M3", []>;
class FixedCmpBranchRIEa<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2),
mnemonic#V.suffix#"\t$R1, $I2", []> {
let isAsmParserOnly = V.alternate;
@ -2159,7 +2159,7 @@ class FixedCmpBranchRIEa<CondVariant V, string mnemonic, bits<16> opcode,
}
multiclass CmpBranchRIEaPair<string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm> {
RegisterOperand cls, ImmOpWithPattern imm> {
let isCodeGenOnly = 1 in
def "" : CmpBranchRIEa<mnemonic, opcode, cls, imm>;
def Asm : AsmCmpBranchRIEa<mnemonic, opcode, cls, imm>;
@ -2193,19 +2193,19 @@ multiclass CmpBranchRIEbPair<string mnemonic, bits<16> opcode,
}
class CmpBranchRIEc<string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEc<opcode, (outs),
(ins cls:$R1, imm:$I2, cond4:$M3, brtarget16:$RI4),
mnemonic#"$M3\t$R1, $I2, $RI4", []>;
class AsmCmpBranchRIEc<string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEc<opcode, (outs),
(ins cls:$R1, imm:$I2, imm32zx4:$M3, brtarget16:$RI4),
mnemonic#"\t$R1, $I2, $M3, $RI4", []>;
class FixedCmpBranchRIEc<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEc<opcode, (outs), (ins cls:$R1, imm:$I2, brtarget16:$RI4),
mnemonic#V.suffix#"\t$R1, $I2, $RI4", []> {
let isAsmParserOnly = V.alternate;
@ -2213,7 +2213,7 @@ class FixedCmpBranchRIEc<CondVariant V, string mnemonic, bits<16> opcode,
}
multiclass CmpBranchRIEcPair<string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm> {
RegisterOperand cls, ImmOpWithPattern imm> {
let isCodeGenOnly = 1 in
def "" : CmpBranchRIEc<mnemonic, opcode, cls, imm>;
def Asm : AsmCmpBranchRIEc<mnemonic, opcode, cls, imm>;
@ -2272,19 +2272,19 @@ multiclass CmpBranchRRSPair<string mnemonic, bits<16> opcode,
}
class CmpBranchRIS<string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIS<opcode, (outs),
(ins cls:$R1, imm:$I2, cond4:$M3, bdaddr12only:$BD4),
mnemonic#"$M3\t$R1, $I2, $BD4", []>;
class AsmCmpBranchRIS<string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIS<opcode, (outs),
(ins cls:$R1, imm:$I2, imm32zx4:$M3, bdaddr12only:$BD4),
mnemonic#"\t$R1, $I2, $M3, $BD4", []>;
class FixedCmpBranchRIS<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIS<opcode, (outs), (ins cls:$R1, imm:$I2, bdaddr12only:$BD4),
mnemonic#V.suffix#"\t$R1, $I2, $BD4", []> {
let isAsmParserOnly = V.alternate;
@ -2292,7 +2292,7 @@ class FixedCmpBranchRIS<CondVariant V, string mnemonic, bits<16> opcode,
}
multiclass CmpBranchRISPair<string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm> {
RegisterOperand cls, ImmOpWithPattern imm> {
let isCodeGenOnly = 1 in
def "" : CmpBranchRIS<mnemonic, opcode, cls, imm>;
def Asm : AsmCmpBranchRIS<mnemonic, opcode, cls, imm>;
@ -2585,7 +2585,7 @@ multiclass StoreMultipleVRSaAlign<string mnemonic, bits<16> opcode> {
// We therefore match the address in the same way as a normal store and
// only use the StoreSI* instruction if the matched address is suitable.
class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
Immediate imm>
ImmOpWithPattern imm>
: InstSI<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(operator imm:$I2, mviaddr12pair:$BD1)]> {
@ -2593,7 +2593,7 @@ class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
}
class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
Immediate imm>
ImmOpWithPattern imm>
: InstSIY<opcode, (outs), (ins mviaddr20pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(operator imm:$I2, mviaddr20pair:$BD1)]> {
@ -2601,7 +2601,7 @@ class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
Immediate imm>
ImmOpWithPattern imm>
: InstSIL<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(operator imm:$I2, mviaddr12pair:$BD1)]> {
@ -2609,7 +2609,7 @@ class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
SDPatternOperator operator, Immediate imm> {
SDPatternOperator operator, ImmOpWithPattern imm> {
let DispKey = mnemonic in {
let DispSize = "12" in
def "" : StoreSI<mnemonic, siOpcode, operator, imm>;
@ -2665,7 +2665,7 @@ multiclass CondStoreRSYPair<string mnemonic, bits<16> opcode,
def Asm : AsmCondStoreRSY<mnemonic, opcode, cls, bytes, mode>;
}
class SideEffectUnaryI<string mnemonic, bits<8> opcode, Immediate imm>
class SideEffectUnaryI<string mnemonic, bits<8> opcode, ImmOpWithPattern imm>
: InstI<opcode, (outs), (ins imm:$I1),
mnemonic#"\t$I1", []>;
@ -2761,13 +2761,13 @@ class UnaryMemRRFc<string mnemonic, bits<16> opcode,
}
class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIa<opcode, (outs cls:$R1), (ins imm:$I2),
mnemonic#"\t$R1, $I2",
[(set cls:$R1, (operator imm:$I2))]>;
class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRILa<opcode, (outs cls:$R1), (ins imm:$I2),
mnemonic#"\t$R1, $I2",
[(set cls:$R1, (operator imm:$I2))]>;
@ -2885,14 +2885,14 @@ multiclass UnaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
}
class UnaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr, Immediate imm, bits<4> type = 0>
TypedReg tr, ImmOpWithPattern imm, bits<4> type = 0>
: InstVRIa<opcode, (outs tr.op:$V1), (ins imm:$I2),
mnemonic#"\t$V1, $I2",
[(set (tr.vt tr.op:$V1), (operator imm:$I2))]> {
[(set (tr.vt tr.op:$V1), (operator (i32 timm:$I2)))]> {
let M3 = type;
}
class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, Immediate imm>
class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, ImmOpWithPattern imm>
: InstVRIa<opcode, (outs VR128:$V1), (ins imm:$I2, imm32zx4:$M3),
mnemonic#"\t$V1, $I2, $M3", []>;
@ -3021,7 +3021,7 @@ class SideEffectBinaryRRFc<string mnemonic, bits<16> opcode,
}
class SideEffectBinaryIE<string mnemonic, bits<16> opcode,
Immediate imm1, Immediate imm2>
ImmOpWithPattern imm1, ImmOpWithPattern imm2>
: InstIE<opcode, (outs), (ins imm1:$I1, imm2:$I2),
mnemonic#"\t$I1, $I2", []>;
@ -3030,7 +3030,7 @@ class SideEffectBinarySI<string mnemonic, bits<8> opcode, Operand imm>
mnemonic#"\t$BD1, $I2", []>;
class SideEffectBinarySIL<string mnemonic, bits<16> opcode,
SDPatternOperator operator, Immediate imm>
SDPatternOperator operator, ImmOpWithPattern imm>
: InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2", [(operator bdaddr12only:$BD1, imm:$I2)]>;
@ -3165,7 +3165,7 @@ class BinaryRRFc<string mnemonic, bits<16> opcode,
mnemonic#"\t$R1, $R2, $M3", []>;
class BinaryMemRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2, Immediate imm>
RegisterOperand cls1, RegisterOperand cls2, ImmOpWithPattern imm>
: InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src, imm:$M3),
mnemonic#"\t$R1, $R2, $M3", []> {
let Constraints = "$R1 = $R1src";
@ -3267,7 +3267,7 @@ multiclass CondBinaryRRFaPair<string mnemonic, bits<16> opcode,
}
class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
mnemonic#"\t$R1, $I2",
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
@ -3276,14 +3276,14 @@ class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
}
class BinaryRIE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEd<opcode, (outs cls:$R1), (ins cls:$R3, imm:$I2),
mnemonic#"\t$R1, $R3, $I2",
[(set cls:$R1, (operator cls:$R3, imm:$I2))]>;
multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
SDPatternOperator operator, RegisterOperand cls,
Immediate imm> {
ImmOpWithPattern imm> {
let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in
def K : BinaryRIE<mnemonic##"k", opcode2, operator, cls, imm>,
@ -3294,7 +3294,7 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
}
class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
Immediate imm>
ImmOpWithPattern imm>
: InstRIEg<opcode, (outs cls:$R1),
(ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
mnemonic#"$M3\t$R1, $I2",
@ -3308,7 +3308,7 @@ class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
// Like CondBinaryRIE, but used for the raw assembly form. The condition-code
// mask is the third operand rather than being part of the mnemonic.
class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
Immediate imm>
ImmOpWithPattern imm>
: InstRIEg<opcode, (outs cls:$R1),
(ins cls:$R1src, imm:$I2, imm32zx4:$M3),
mnemonic#"\t$R1, $I2, $M3", []> {
@ -3318,7 +3318,7 @@ class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
// Like CondBinaryRIE, but with a fixed CC mask.
class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
mnemonic#V.suffix#"\t$R1, $I2", []> {
let Constraints = "$R1 = $R1src";
@ -3328,14 +3328,14 @@ class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode,
}
multiclass CondBinaryRIEPair<string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm> {
RegisterOperand cls, ImmOpWithPattern imm> {
let isCodeGenOnly = 1 in
def "" : CondBinaryRIE<mnemonic, opcode, cls, imm>;
def Asm : AsmCondBinaryRIE<mnemonic, opcode, cls, imm>;
}
class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRILa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
mnemonic#"\t$R1, $I2",
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
@ -3484,7 +3484,7 @@ class BinaryVRIb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr, bits<4> type>
: InstVRIb<opcode, (outs tr.op:$V1), (ins imm32zx8:$I2, imm32zx8:$I3),
mnemonic#"\t$V1, $I2, $I3",
[(set (tr.vt tr.op:$V1), (operator imm32zx8:$I2, imm32zx8:$I3))]> {
[(set (tr.vt tr.op:$V1), (operator imm32zx8_timm:$I2, imm32zx8_timm:$I3))]> {
let M4 = type;
}
@ -3498,7 +3498,7 @@ class BinaryVRIc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
: InstVRIc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, imm32zx16:$I2),
mnemonic#"\t$V1, $V3, $I2",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V3),
imm32zx16:$I2))]> {
imm32zx16_timm:$I2))]> {
let M4 = type;
}
@ -3512,7 +3512,7 @@ class BinaryVRIe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
: InstVRIe<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx12:$I3),
mnemonic#"\t$V1, $V2, $I3",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
imm32zx12:$I3))]> {
imm32zx12_timm:$I3))]> {
let M4 = type;
let M5 = m5;
}
@ -3715,7 +3715,7 @@ class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
: InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
mnemonic#"\t$V1, $XBD2, $M3",
[(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2,
imm32zx4:$M3))]> {
imm32zx4_timm:$M3))]> {
let mayLoad = 1;
let AccessBytes = bytes;
}
@ -3765,7 +3765,7 @@ class BinaryVSI<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
Immediate index>
ImmOpWithPattern index>
: InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3),
mnemonic#"\t$V1, $VBD2, $M3", []> {
let mayStore = 1;
@ -3774,7 +3774,7 @@ class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
class StoreBinaryVRX<string mnemonic, bits<16> opcode,
SDPatternOperator operator, TypedReg tr, bits<5> bytes,
Immediate index>
ImmOpWithPattern index>
: InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2, index:$M3),
mnemonic#"\t$V1, $XBD2, $M3",
[(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2, index:$M3)]> {
@ -3809,7 +3809,7 @@ class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIa<opcode, (outs), (ins cls:$R1, imm:$I2),
mnemonic#"\t$R1, $I2",
[(set CC, (operator cls:$R1, imm:$I2))]> {
@ -3817,7 +3817,7 @@ class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
}
class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
RegisterOperand cls, ImmOpWithPattern imm>
: InstRILa<opcode, (outs), (ins cls:$R1, imm:$I2),
mnemonic#"\t$R1, $I2",
[(set CC, (operator cls:$R1, imm:$I2))]> {
@ -3924,7 +3924,7 @@ class CompareSSb<string mnemonic, bits<8> opcode>
}
class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
SDPatternOperator load, Immediate imm,
SDPatternOperator load, ImmOpWithPattern imm,
AddressingMode mode = bdaddr12only>
: InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
@ -3934,7 +3934,7 @@ class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
}
class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
SDPatternOperator load, Immediate imm>
SDPatternOperator load, ImmOpWithPattern imm>
: InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(set CC, (operator (load bdaddr12only:$BD1), imm:$I2))]> {
@ -3943,7 +3943,7 @@ class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
SDPatternOperator load, Immediate imm,
SDPatternOperator load, ImmOpWithPattern imm,
AddressingMode mode = bdaddr20only>
: InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
@ -3954,7 +3954,7 @@ class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
multiclass CompareSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
SDPatternOperator operator, SDPatternOperator load,
Immediate imm> {
ImmOpWithPattern imm> {
let DispKey = mnemonic in {
let DispSize = "12" in
def "" : CompareSI<mnemonic, siOpcode, operator, load, imm, bdaddr12pair>;
@ -4012,7 +4012,7 @@ class TestRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
class TestBinarySIL<string mnemonic, bits<16> opcode,
SDPatternOperator operator, Immediate imm>
SDPatternOperator operator, ImmOpWithPattern imm>
: InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>;
@ -4073,7 +4073,7 @@ class SideEffectTernaryMemMemMemRRFb<string mnemonic, bits<16> opcode,
class SideEffectTernaryRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
Immediate imm>
ImmOpWithPattern imm>
: InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2, imm:$M3),
mnemonic#"\t$R1, $R2, $M3", []>;
@ -4086,7 +4086,7 @@ multiclass SideEffectTernaryRRFcOpt<string mnemonic, bits<16> opcode,
class SideEffectTernaryMemMemRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
Immediate imm>
ImmOpWithPattern imm>
: InstRRFc<opcode, (outs cls1:$R1, cls2:$R2),
(ins cls1:$R1src, cls2:$R2src, imm:$M3),
mnemonic#"\t$R1, $R2, $M3", []> {
@ -4221,7 +4221,7 @@ class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
class TernaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index>
TypedReg tr1, TypedReg tr2, ImmOpWithPattern imm, ImmOpWithPattern index>
: InstVRIa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V1src, imm:$I2, index:$M3),
mnemonic#"\t$V1, $I2, $M3",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src),
@ -4237,7 +4237,7 @@ class TernaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator,
mnemonic#"\t$V1, $V2, $V3, $I4",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
(tr2.vt tr2.op:$V3),
imm32zx8:$I4))]> {
imm32zx8_timm:$I4))]> {
let M5 = type;
}
@ -4252,8 +4252,8 @@ class TernaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
(ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5),
mnemonic#"\t$V1, $V2, $M4, $M5",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
imm32zx4:$M4,
imm32zx4:$M5))],
imm32zx4_timm:$M4,
imm32zx4_timm:$M5))],
m4or> {
let M3 = type;
}
@ -4285,13 +4285,13 @@ multiclass TernaryOptVRRbSPair<string mnemonic, bits<16> opcode,
TypedReg tr1, TypedReg tr2, bits<4> type,
bits<4> modifier = 0> {
def "" : TernaryVRRb<mnemonic, opcode, operator, tr1, tr2, type,
imm32zx4even, !and (modifier, 14)>;
imm32zx4even_timm, !and (modifier, 14)>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3",
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, 0)>;
let Defs = [CC] in
def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
imm32zx4even, !add(!and (modifier, 14), 1)>;
imm32zx4even_timm, !add(!and (modifier, 14), 1)>;
def : InstAlias<mnemonic#"s\t$V1, $V2, $V3",
(!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, 0)>;
@ -4314,7 +4314,7 @@ class TernaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
mnemonic#"\t$V1, $V2, $V3, $M4",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
(tr2.vt tr2.op:$V3),
imm32zx4:$M4))]> {
imm32zx4_timm:$M4))]> {
let M5 = 0;
let M6 = 0;
}
@ -4327,7 +4327,7 @@ class TernaryVRRcFloat<string mnemonic, bits<16> opcode,
mnemonic#"\t$V1, $V2, $V3, $M6",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
(tr2.vt tr2.op:$V3),
imm32zx4:$M6))]> {
imm32zx4_timm:$M6))]> {
let M4 = type;
let M5 = m5;
}
@ -4429,7 +4429,7 @@ class TernaryVRSbGeneric<string mnemonic, bits<16> opcode>
}
class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
Immediate index>
ImmOpWithPattern index>
: InstVRV<opcode, (outs VR128:$V1),
(ins VR128:$V1src, bdvaddr12only:$VBD2, index:$M3),
mnemonic#"\t$V1, $VBD2, $M3", []> {
@ -4440,7 +4440,7 @@ class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
}
class TernaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index>
TypedReg tr1, TypedReg tr2, bits<5> bytes, ImmOpWithPattern index>
: InstVRX<opcode, (outs tr1.op:$V1),
(ins tr2.op:$V1src, bdxaddr12only:$XBD2, index:$M3),
mnemonic#"\t$V1, $XBD2, $M3",
@ -4461,7 +4461,7 @@ class QuaternaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operato
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src),
(tr2.vt tr2.op:$V2),
(tr2.vt tr2.op:$V3),
imm32zx8:$I4))]> {
imm32zx8_timm:$I4))]> {
let Constraints = "$V1 = $V1src";
let DisableEncoding = "$V1src";
let M5 = type;
@ -4480,7 +4480,7 @@ class QuaternaryVRIf<string mnemonic, bits<16> opcode>
: InstVRIf<opcode, (outs VR128:$V1),
(ins VR128:$V2, VR128:$V3,
imm32zx8:$I4, imm32zx4:$M5),
mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []>;
mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []>;
class QuaternaryVRIg<string mnemonic, bits<16> opcode>
: InstVRIg<opcode, (outs VR128:$V1),
@ -4491,7 +4491,7 @@ class QuaternaryVRIg<string mnemonic, bits<16> opcode>
class QuaternaryVRRd<string mnemonic, bits<16> opcode,
SDPatternOperator operator, TypedReg tr1, TypedReg tr2,
TypedReg tr3, TypedReg tr4, bits<4> type,
SDPatternOperator m6mask = imm32zx4, bits<4> m6or = 0>
SDPatternOperator m6mask = imm32zx4_timm, bits<4> m6or = 0>
: InstVRRd<opcode, (outs tr1.op:$V1),
(ins tr2.op:$V2, tr3.op:$V3, tr4.op:$V4, m6mask:$M6),
mnemonic#"\t$V1, $V2, $V3, $V4, $M6",
@ -4518,14 +4518,14 @@ multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode,
bits<4> modifier = 0> {
def "" : QuaternaryVRRd<mnemonic, opcode, operator,
tr1, tr2, tr2, tr2, type,
imm32zx4even, !and (modifier, 14)>;
imm32zx4even_timm, !and (modifier, 14)>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
(!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, tr2.op:$V4, 0)>;
let Defs = [CC] in
def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc,
tr1, tr2, tr2, tr2, type,
imm32zx4even, !add (!and (modifier, 14), 1)>;
imm32zx4even_timm, !add (!and (modifier, 14), 1)>;
def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4",
(!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
tr2.op:$V3, tr2.op:$V4, 0)>;
@ -4536,7 +4536,7 @@ multiclass QuaternaryOptVRRdSPairGeneric<string mnemonic, bits<16> opcode> {
def "" : QuaternaryVRRdGeneric<mnemonic, opcode>;
def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
(!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
VR128:$V4, imm32zx4:$M5, 0)>;
VR128:$V4, imm32zx4_timm:$M5, 0)>;
}
class SideEffectQuaternaryRRFa<string mnemonic, bits<16> opcode,
@ -4638,13 +4638,13 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
: InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2),
mnemonic##"\t$M1, $XBD2",
[(operator imm32zx4:$M1, bdxaddr20only:$XBD2)]>;
[(operator imm32zx4_timm:$M1, bdxaddr20only:$XBD2)]>;
class PrefetchRILPC<string mnemonic, bits<12> opcode,
SDPatternOperator operator>
: InstRILc<opcode, (outs), (ins imm32zx4:$M1, pcrel32:$RI2),
: InstRILc<opcode, (outs), (ins imm32zx4_timm:$M1, pcrel32:$RI2),
mnemonic##"\t$M1, $RI2",
[(operator imm32zx4:$M1, pcrel32:$RI2)]> {
[(operator imm32zx4_timm:$M1, pcrel32:$RI2)]> {
// We want PC-relative addresses to be tried ahead of BD and BDX addresses.
// However, BDXs have two extra operands and are therefore 6 units more
// complex.
@ -4691,7 +4691,7 @@ class Pseudo<dag outs, dag ins, list<dag> pattern>
// Like UnaryRI, but expanded after RA depending on the choice of register.
class UnaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
Immediate imm>
ImmOpWithPattern imm>
: Pseudo<(outs cls:$R1), (ins imm:$I2),
[(set cls:$R1, (operator imm:$I2))]>;
@ -4720,7 +4720,7 @@ class UnaryRRPseudo<string key, SDPatternOperator operator,
// Like BinaryRI, but expanded after RA depending on the choice of register.
class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
Immediate imm>
ImmOpWithPattern imm>
: Pseudo<(outs cls:$R1), (ins cls:$R1src, imm:$I2),
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
let Constraints = "$R1 = $R1src";
@ -4728,13 +4728,13 @@ class BinaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
// Like BinaryRIE, but expanded after RA depending on the choice of register.
class BinaryRIEPseudo<SDPatternOperator operator, RegisterOperand cls,
Immediate imm>
ImmOpWithPattern imm>
: Pseudo<(outs cls:$R1), (ins cls:$R3, imm:$I2),
[(set cls:$R1, (operator cls:$R3, imm:$I2))]>;
// Like BinaryRIAndK, but expanded after RA depending on the choice of register.
multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
RegisterOperand cls, Immediate imm> {
RegisterOperand cls, ImmOpWithPattern imm> {
let NumOpsKey = key in {
let NumOpsValue = "3" in
def K : BinaryRIEPseudo<operator, cls, imm>,
@ -4764,7 +4764,7 @@ class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
// Like CompareRI, but expanded after RA depending on the choice of register.
class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
Immediate imm>
ImmOpWithPattern imm>
: Pseudo<(outs), (ins cls:$R1, imm:$I2),
[(set CC, (operator cls:$R1, imm:$I2))]> {
let isCompare = 1;
@ -4783,7 +4783,7 @@ class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
}
// Like TestBinarySIL, but expanded later.
class TestBinarySILPseudo<SDPatternOperator operator, Immediate imm>
class TestBinarySILPseudo<SDPatternOperator operator, ImmOpWithPattern imm>
: Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2),
[(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>;
@ -4812,7 +4812,7 @@ class CondBinaryRRFaPseudo<RegisterOperand cls1, RegisterOperand cls2,
// Like CondBinaryRIE, but expanded after RA depending on the choice of
// register.
class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm>
class CondBinaryRIEPseudo<RegisterOperand cls, ImmOpWithPattern imm>
: Pseudo<(outs cls:$R1),
(ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
[(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src,
@ -4876,7 +4876,7 @@ class SelectWrapper<ValueType vt, RegisterOperand cls>
: Pseudo<(outs cls:$dst),
(ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc),
[(set (vt cls:$dst), (z_select_ccmask cls:$src1, cls:$src2,
imm32zx4:$valid, imm32zx4:$cc))]> {
imm32zx4_timm:$valid, imm32zx4_timm:$cc))]> {
let usesCustomInserter = 1;
let hasNoSchedulingInfo = 1;
let Uses = [CC];
@ -4890,12 +4890,12 @@ multiclass CondStores<RegisterOperand cls, SDPatternOperator store,
def "" : Pseudo<(outs),
(ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
[(store (z_select_ccmask cls:$new, (load mode:$addr),
imm32zx4:$valid, imm32zx4:$cc),
imm32zx4_timm:$valid, imm32zx4_timm:$cc),
mode:$addr)]>;
def Inv : Pseudo<(outs),
(ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc),
[(store (z_select_ccmask (load mode:$addr), cls:$new,
imm32zx4:$valid, imm32zx4:$cc),
imm32zx4_timm:$valid, imm32zx4_timm:$cc),
mode:$addr)]>;
}
}
@ -4917,11 +4917,11 @@ class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls,
// Specializations of AtomicLoadWBinary.
class AtomicLoadBinaryReg32<SDPatternOperator operator>
: AtomicLoadBinary<operator, GR32, (i32 GR32:$src2), GR32>;
class AtomicLoadBinaryImm32<SDPatternOperator operator, Immediate imm>
class AtomicLoadBinaryImm32<SDPatternOperator operator, ImmOpWithPattern imm>
: AtomicLoadBinary<operator, GR32, (i32 imm:$src2), imm>;
class AtomicLoadBinaryReg64<SDPatternOperator operator>
: AtomicLoadBinary<operator, GR64, (i64 GR64:$src2), GR64>;
class AtomicLoadBinaryImm64<SDPatternOperator operator, Immediate imm>
class AtomicLoadBinaryImm64<SDPatternOperator operator, ImmOpWithPattern imm>
: AtomicLoadBinary<operator, GR64, (i64 imm:$src2), imm>;
// OPERATOR is ATOMIC_SWAPW or an ATOMIC_LOADW_* operation. PAT and OPERAND
@ -4944,7 +4944,7 @@ class AtomicLoadWBinary<SDPatternOperator operator, dag pat,
// Specializations of AtomicLoadWBinary.
class AtomicLoadWBinaryReg<SDPatternOperator operator>
: AtomicLoadWBinary<operator, (i32 GR32:$src2), GR32>;
class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
class AtomicLoadWBinaryImm<SDPatternOperator operator, ImmOpWithPattern imm>
: AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
// A pseudo instruction that is a direct alias of a real instruction.
@ -4979,7 +4979,7 @@ class StoreAliasVRX<SDPatternOperator operator, TypedReg tr,
// An alias of a BinaryRI, but with different register sizes.
class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
Immediate imm>
ImmOpWithPattern imm>
: Alias<4, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
let Constraints = "$R1 = $R1src";
@ -4987,7 +4987,7 @@ class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
// An alias of a BinaryRIL, but with different register sizes.
class BinaryAliasRIL<SDPatternOperator operator, RegisterOperand cls,
Immediate imm>
ImmOpWithPattern imm>
: Alias<6, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
[(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
let Constraints = "$R1 = $R1src";
@ -4999,7 +4999,7 @@ class BinaryAliasVRRf<RegisterOperand cls>
// An alias of a CompareRI, but with different register sizes.
class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
Immediate imm>
ImmOpWithPattern imm>
: Alias<4, (outs), (ins cls:$R1, imm:$I2),
[(set CC, (operator cls:$R1, imm:$I2))]> {
let isCompare = 1;

View File

@ -60,7 +60,7 @@ let Predicates = [FeatureVector] in {
// Generate byte mask.
def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
def VONE : InherentVRIa<"vone", 0xE744, 0xffff>;
def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16_timm>;
// Generate mask.
def VGM : BinaryVRIbGeneric<"vgm", 0xE746>;
@ -71,10 +71,10 @@ let Predicates = [FeatureVector] in {
// Replicate immediate.
def VREPI : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>;
def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16_timm, 0>;
def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16_timm, 1>;
def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16_timm, 2>;
def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16_timm, 3>;
}
// Load element immediate.
@ -116,7 +116,7 @@ let Predicates = [FeatureVector] in {
(ins bdxaddr12only:$XBD2, imm32zx4:$M3),
"lcbb\t$R1, $XBD2, $M3",
[(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2,
imm32zx4:$M3))]>;
imm32zx4_timm:$M3))]>;
// Load with length. The number of loaded bytes is only known at run time.
def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>;
@ -362,9 +362,9 @@ let Predicates = [FeatureVector] in {
def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>;
def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>;
def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>;
def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)),
def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16_timm:$index)),
(VREPF VR128:$vec, imm32zx16:$index)>;
def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)),
def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16_timm:$index)),
(VREPG VR128:$vec, imm32zx16:$index)>;
// Select.
@ -778,7 +778,7 @@ let Predicates = [FeatureVector] in {
// Shift left double by byte.
def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>;
def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z),
def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8_timm:$z),
(VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>;
// Shift left double by bit.

View File

@ -21,15 +21,32 @@ class ImmediateTLSAsmOperand<string name>
let RenderMethod = "addImmTLSOperands";
}
class ImmediateOp<ValueType vt, string asmop> : Operand<vt> {
let PrintMethod = "print"##asmop##"Operand";
let DecoderMethod = "decode"##asmop##"Operand";
let ParserMatchClass = !cast<AsmOperandClass>(asmop);
}
class ImmOpWithPattern<ValueType vt, string asmop, code pred, SDNodeXForm xform,
SDNode ImmNode = imm> :
ImmediateOp<vt, asmop>, PatLeaf<(vt ImmNode), pred, xform>;
// class ImmediatePatLeaf<ValueType vt, code pred,
// SDNodeXForm xform, SDNode ImmNode>
// : PatLeaf<(vt ImmNode), pred, xform>;
// Constructs both a DAG pattern and instruction operand for an immediate
// of type VT. PRED returns true if a node is acceptable and XFORM returns
// the operand value associated with the node. ASMOP is the name of the
// associated asm operand, and also forms the basis of the asm print method.
class Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop>
: PatLeaf<(vt imm), pred, xform>, Operand<vt> {
let PrintMethod = "print"##asmop##"Operand";
let DecoderMethod = "decode"##asmop##"Operand";
let ParserMatchClass = !cast<AsmOperandClass>(asmop);
multiclass Immediate<ValueType vt, code pred, SDNodeXForm xform, string asmop> {
// def "" : ImmediateOp<vt, asmop>,
// PatLeaf<(vt imm), pred, xform>;
def "" : ImmOpWithPattern<vt, asmop, pred, xform>;
// def _timm : PatLeaf<(vt timm), pred, xform>;
def _timm : ImmOpWithPattern<vt, asmop, pred, xform, timm>;
}
// Constructs an asm operand for a PC-relative address. SIZE says how
@ -295,87 +312,87 @@ def U48Imm : ImmediateAsmOperand<"U48Imm">;
// Immediates for the lower and upper 16 bits of an i32, with the other
// bits of the i32 being zero.
def imm32ll16 : Immediate<i32, [{
defm imm32ll16 : Immediate<i32, [{
return SystemZ::isImmLL(N->getZExtValue());
}], LL16, "U16Imm">;
def imm32lh16 : Immediate<i32, [{
defm imm32lh16 : Immediate<i32, [{
return SystemZ::isImmLH(N->getZExtValue());
}], LH16, "U16Imm">;
// Immediates for the lower and upper 16 bits of an i32, with the other
// bits of the i32 being one.
def imm32ll16c : Immediate<i32, [{
defm imm32ll16c : Immediate<i32, [{
return SystemZ::isImmLL(uint32_t(~N->getZExtValue()));
}], LL16, "U16Imm">;
def imm32lh16c : Immediate<i32, [{
defm imm32lh16c : Immediate<i32, [{
return SystemZ::isImmLH(uint32_t(~N->getZExtValue()));
}], LH16, "U16Imm">;
// Short immediates
def imm32zx1 : Immediate<i32, [{
defm imm32zx1 : Immediate<i32, [{
return isUInt<1>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U1Imm">;
def imm32zx2 : Immediate<i32, [{
defm imm32zx2 : Immediate<i32, [{
return isUInt<2>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U2Imm">;
def imm32zx3 : Immediate<i32, [{
defm imm32zx3 : Immediate<i32, [{
return isUInt<3>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U3Imm">;
def imm32zx4 : Immediate<i32, [{
defm imm32zx4 : Immediate<i32, [{
return isUInt<4>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U4Imm">;
// Note: this enforces an even value during code generation only.
// When used from the assembler, any 4-bit value is allowed.
def imm32zx4even : Immediate<i32, [{
defm imm32zx4even : Immediate<i32, [{
return isUInt<4>(N->getZExtValue());
}], UIMM8EVEN, "U4Imm">;
def imm32zx6 : Immediate<i32, [{
defm imm32zx6 : Immediate<i32, [{
return isUInt<6>(N->getZExtValue());
}], NOOP_SDNodeXForm, "U6Imm">;
def imm32sx8 : Immediate<i32, [{
defm imm32sx8 : Immediate<i32, [{
return isInt<8>(N->getSExtValue());
}], SIMM8, "S8Imm">;
def imm32zx8 : Immediate<i32, [{
defm imm32zx8 : Immediate<i32, [{
return isUInt<8>(N->getZExtValue());
}], UIMM8, "U8Imm">;
def imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
defm imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
def imm32zx12 : Immediate<i32, [{
defm imm32zx12 : Immediate<i32, [{
return isUInt<12>(N->getZExtValue());
}], UIMM12, "U12Imm">;
def imm32sx16 : Immediate<i32, [{
defm imm32sx16 : Immediate<i32, [{
return isInt<16>(N->getSExtValue());
}], SIMM16, "S16Imm">;
def imm32sx16n : Immediate<i32, [{
defm imm32sx16n : Immediate<i32, [{
return isInt<16>(-N->getSExtValue());
}], NEGSIMM16, "S16Imm">;
def imm32zx16 : Immediate<i32, [{
defm imm32zx16 : Immediate<i32, [{
return isUInt<16>(N->getZExtValue());
}], UIMM16, "U16Imm">;
def imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
def imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;
defm imm32sx16trunc : Immediate<i32, [{}], SIMM16, "S16Imm">;
defm imm32zx16trunc : Immediate<i32, [{}], UIMM16, "U16Imm">;
// Full 32-bit immediates. we need both signed and unsigned versions
// because the assembler is picky. E.g. AFI requires signed operands
// while NILF requires unsigned ones.
def simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">;
def uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">;
defm simm32 : Immediate<i32, [{}], SIMM32, "S32Imm">;
defm uimm32 : Immediate<i32, [{}], UIMM32, "U32Imm">;
def simm32n : Immediate<i32, [{
defm simm32n : Immediate<i32, [{
return isInt<32>(-N->getSExtValue());
}], NEGSIMM32, "S32Imm">;
@ -387,107 +404,107 @@ def imm32 : ImmLeaf<i32, [{}]>;
// Immediates for 16-bit chunks of an i64, with the other bits of the
// i32 being zero.
def imm64ll16 : Immediate<i64, [{
defm imm64ll16 : Immediate<i64, [{
return SystemZ::isImmLL(N->getZExtValue());
}], LL16, "U16Imm">;
def imm64lh16 : Immediate<i64, [{
defm imm64lh16 : Immediate<i64, [{
return SystemZ::isImmLH(N->getZExtValue());
}], LH16, "U16Imm">;
def imm64hl16 : Immediate<i64, [{
defm imm64hl16 : Immediate<i64, [{
return SystemZ::isImmHL(N->getZExtValue());
}], HL16, "U16Imm">;
def imm64hh16 : Immediate<i64, [{
defm imm64hh16 : Immediate<i64, [{
return SystemZ::isImmHH(N->getZExtValue());
}], HH16, "U16Imm">;
// Immediates for 16-bit chunks of an i64, with the other bits of the
// i32 being one.
def imm64ll16c : Immediate<i64, [{
defm imm64ll16c : Immediate<i64, [{
return SystemZ::isImmLL(uint64_t(~N->getZExtValue()));
}], LL16, "U16Imm">;
def imm64lh16c : Immediate<i64, [{
defm imm64lh16c : Immediate<i64, [{
return SystemZ::isImmLH(uint64_t(~N->getZExtValue()));
}], LH16, "U16Imm">;
def imm64hl16c : Immediate<i64, [{
defm imm64hl16c : Immediate<i64, [{
return SystemZ::isImmHL(uint64_t(~N->getZExtValue()));
}], HL16, "U16Imm">;
def imm64hh16c : Immediate<i64, [{
defm imm64hh16c : Immediate<i64, [{
return SystemZ::isImmHH(uint64_t(~N->getZExtValue()));
}], HH16, "U16Imm">;
// Immediates for the lower and upper 32 bits of an i64, with the other
// bits of the i32 being zero.
def imm64lf32 : Immediate<i64, [{
defm imm64lf32 : Immediate<i64, [{
return SystemZ::isImmLF(N->getZExtValue());
}], LF32, "U32Imm">;
def imm64hf32 : Immediate<i64, [{
defm imm64hf32 : Immediate<i64, [{
return SystemZ::isImmHF(N->getZExtValue());
}], HF32, "U32Imm">;
// Immediates for the lower and upper 32 bits of an i64, with the other
// bits of the i32 being one.
def imm64lf32c : Immediate<i64, [{
defm imm64lf32c : Immediate<i64, [{
return SystemZ::isImmLF(uint64_t(~N->getZExtValue()));
}], LF32, "U32Imm">;
def imm64hf32c : Immediate<i64, [{
defm imm64hf32c : Immediate<i64, [{
return SystemZ::isImmHF(uint64_t(~N->getZExtValue()));
}], HF32, "U32Imm">;
// Negated immediates that fit LF32 or LH16.
def imm64lh16n : Immediate<i64, [{
defm imm64lh16n : Immediate<i64, [{
return SystemZ::isImmLH(uint64_t(-N->getZExtValue()));
}], NEGLH16, "U16Imm">;
def imm64lf32n : Immediate<i64, [{
defm imm64lf32n : Immediate<i64, [{
return SystemZ::isImmLF(uint64_t(-N->getZExtValue()));
}], NEGLF32, "U32Imm">;
// Short immediates.
def imm64sx8 : Immediate<i64, [{
defm imm64sx8 : Immediate<i64, [{
return isInt<8>(N->getSExtValue());
}], SIMM8, "S8Imm">;
def imm64zx8 : Immediate<i64, [{
defm imm64zx8 : Immediate<i64, [{
return isUInt<8>(N->getSExtValue());
}], UIMM8, "U8Imm">;
def imm64sx16 : Immediate<i64, [{
defm imm64sx16 : Immediate<i64, [{
return isInt<16>(N->getSExtValue());
}], SIMM16, "S16Imm">;
def imm64sx16n : Immediate<i64, [{
defm imm64sx16n : Immediate<i64, [{
return isInt<16>(-N->getSExtValue());
}], NEGSIMM16, "S16Imm">;
def imm64zx16 : Immediate<i64, [{
defm imm64zx16 : Immediate<i64, [{
return isUInt<16>(N->getZExtValue());
}], UIMM16, "U16Imm">;
def imm64sx32 : Immediate<i64, [{
defm imm64sx32 : Immediate<i64, [{
return isInt<32>(N->getSExtValue());
}], SIMM32, "S32Imm">;
def imm64sx32n : Immediate<i64, [{
defm imm64sx32n : Immediate<i64, [{
return isInt<32>(-N->getSExtValue());
}], NEGSIMM32, "S32Imm">;
def imm64zx32 : Immediate<i64, [{
defm imm64zx32 : Immediate<i64, [{
return isUInt<32>(N->getZExtValue());
}], UIMM32, "U32Imm">;
def imm64zx32n : Immediate<i64, [{
defm imm64zx32n : Immediate<i64, [{
return isUInt<32>(-N->getSExtValue());
}], NEGUIMM32, "U32Imm">;
def imm64zx48 : Immediate<i64, [{
defm imm64zx48 : Immediate<i64, [{
return isUInt<64>(N->getZExtValue());
}], UIMM48, "U48Imm">;
@ -637,7 +654,7 @@ def bdvaddr12only : BDVMode< "64", "12">;
//===----------------------------------------------------------------------===//
// A 4-bit condition-code mask.
def cond4 : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 16); }]>,
def cond4 : PatLeaf<(i32 timm), [{ return (N->getZExtValue() < 16); }]>,
Operand<i32> {
let PrintMethod = "printCond4Operand";
}

View File

@ -472,17 +472,17 @@ def z_subcarry : PatFrag<(ops node:$lhs, node:$rhs),
(z_subcarry_1 node:$lhs, node:$rhs, CC)>;
// Signed and unsigned comparisons.
def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
def z_scmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{
unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
return Type != SystemZICMP::UnsignedOnly;
}]>;
def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, imm), [{
def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{
unsigned Type = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
return Type != SystemZICMP::SignedOnly;
}]>;
// Register- and memory-based TEST UNDER MASK.
def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, imm)>;
def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, timm)>;
def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>;
// Register sign-extend operations. Sub-32-bit values are represented as i32s.

View File

@ -100,12 +100,12 @@ multiclass CondStores64<Instruction insn, Instruction insninv,
SDPatternOperator store, SDPatternOperator load,
AddressingMode mode> {
def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr),
imm32zx4:$valid, imm32zx4:$cc),
imm32zx4_timm:$valid, imm32zx4_timm:$cc),
mode:$addr),
(insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
imm32zx4:$valid, imm32zx4:$cc)>;
def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new,
imm32zx4:$valid, imm32zx4:$cc),
imm32zx4_timm:$valid, imm32zx4_timm:$cc),
mode:$addr),
(insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr,
imm32zx4:$valid, imm32zx4:$cc)>;

View File

@ -209,10 +209,10 @@ std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr(
// Now select between End and null, depending on whether the character
// was found.
SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT),
DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32),
DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32),
CCReg};
SDValue Ops[] = {
End, DAG.getConstant(0, DL, PtrVT),
DAG.getTargetConstant(SystemZ::CCMASK_SRST, DL, MVT::i32),
DAG.getTargetConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), CCReg};
End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, PtrVT, Ops);
return std::make_pair(End, Chain);
}

View File

@ -39,7 +39,7 @@ defm MEMORY_INIT :
(ins i32imm_op:$seg, i32imm_op:$idx, I32:$dest,
I32:$offset, I32:$size),
(outs), (ins i32imm_op:$seg, i32imm_op:$idx),
[(int_wasm_memory_init (i32 imm:$seg), (i32 imm:$idx), I32:$dest,
[(int_wasm_memory_init (i32 timm:$seg), (i32 timm:$idx), I32:$dest,
I32:$offset, I32:$size
)],
"memory.init\t$seg, $idx, $dest, $offset, $size",
@ -48,7 +48,7 @@ defm MEMORY_INIT :
let hasSideEffects = 1 in
defm DATA_DROP :
BULK_I<(outs), (ins i32imm_op:$seg), (outs), (ins i32imm_op:$seg),
[(int_wasm_data_drop (i32 imm:$seg))],
[(int_wasm_data_drop (i32 timm:$seg))],
"data.drop\t$seg", "data.drop\t$seg", 0x09>;
let mayLoad = 1, mayStore = 1 in

View File

@ -879,10 +879,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
case ISD::FRINT: Imm = 0x4; break;
}
SDLoc dl(N);
SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
N->getValueType(0),
N->getOperand(0),
CurDAG->getConstant(Imm, dl, MVT::i8));
SDValue Res = CurDAG->getNode(
X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0),
CurDAG->getTargetConstant(Imm, dl, MVT::i8));
--I;
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
++I;
@ -5096,10 +5095,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
case ISD::FRINT: Imm = 0x4; break;
}
SDLoc dl(Node);
SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl,
Node->getValueType(0),
SDValue Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, Node->getValueType(0),
Node->getOperand(0),
CurDAG->getConstant(Imm, dl, MVT::i8));
CurDAG->getTargetConstant(Imm, dl, MVT::i8));
ReplaceNode(Node, Res.getNode());
SelectCode(Res.getNode());
return;

View File

@ -211,7 +211,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Integer absolute.
if (Subtarget.hasCMov()) {
setOperationAction(ISD::ABS , MVT::i16 , Custom);
setOperationAction(ISD::ABS , MVT::i32 , Custom);
setOperationAction(ISD::ABS , MVT::i32 , Custom);
}
setOperationAction(ISD::ABS , MVT::i64 , Custom);
@ -4981,7 +4981,7 @@ bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
// Find the type this will be legalized too. Otherwise we might prematurely
// convert this to shl+add/sub and then still have to type legalize those ops.
// Another choice would be to defer the decision for illegal types until
// Another choice would be to defer the decision for illegal types until
// after type legalization. But constant splat vectors of i64 can't make it
// through type legalization on 32-bit targets so we would need to special
// case vXi64.
@ -5759,7 +5759,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (IdxVal == 0) {
// Zero lower bits of the Vec
SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8);
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
ZeroIdx);
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
@ -5778,7 +5778,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
if (Vec.isUndef()) {
assert(IdxVal != 0 && "Unexpected index");
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
}
@ -5788,17 +5788,17 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
unsigned ShiftLeft = NumElems - SubVecNumElems;
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
DAG.getConstant(ShiftLeft, dl, MVT::i8));
DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
if (ShiftRight != 0)
SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
DAG.getConstant(ShiftRight, dl, MVT::i8));
DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
}
// Simple case when we put subvector in the upper part
if (IdxVal + SubVecNumElems == NumElems) {
SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
DAG.getConstant(IdxVal, dl, MVT::i8));
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
if (SubVecNumElems * 2 == NumElems) {
// Special case, use legal zero extending insert_subvector. This allows
// isel to opimitize when bits are known zero.
@ -5811,7 +5811,7 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
Undef, Vec, ZeroIdx);
NumElems = WideOpVT.getVectorNumElements();
SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8);
Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
}
@ -5827,17 +5827,17 @@ static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
// Move the current value of the bit to be replace to the lsbs.
Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
DAG.getConstant(IdxVal, dl, MVT::i8));
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
// Xor with the new bit.
Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
// Shift to MSB, filling bottom bits with 0.
unsigned ShiftLeft = NumElems - SubVecNumElems;
Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
DAG.getConstant(ShiftLeft, dl, MVT::i8));
DAG.getTargetConstant(ShiftLeft, dl, MVT::i8));
// Shift to the final position, filling upper bits with 0.
unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
DAG.getConstant(ShiftRight, dl, MVT::i8));
DAG.getTargetConstant(ShiftRight, dl, MVT::i8));
// Xor with original vector leaving the new value.
Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
// Reduce to original width if needed.
@ -7637,7 +7637,7 @@ static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
SDLoc DL(Op);
SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
DAG.getIntPtrConstant(InsertPSMask, DL));
DAG.getIntPtrConstant(InsertPSMask, DL, true));
return DAG.getBitcast(VT, Result);
}
@ -7650,7 +7650,7 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits,
unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
SrcOp = DAG.getBitcast(ShVT, SrcOp);
assert(NumBits % 8 == 0 && "Only support byte sized shifts");
SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, MVT::i8);
SDValue ShiftVal = DAG.getTargetConstant(NumBits / 8, dl, MVT::i8);
return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
}
@ -9439,9 +9439,9 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec,
{4, 5, 6, 7, 4, 5, 6, 7});
if (Subtarget.hasXOP())
return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32,
LoLo, HiHi, IndicesVec,
DAG.getConstant(0, DL, MVT::i8)));
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, LoLo, HiHi,
IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8)));
// Permute Lo and Hi and then select based on index range.
// This works as VPERMILPS only uses index bits[0:1] to permute elements.
SDValue Res = DAG.getSelectCC(
@ -9475,9 +9475,9 @@ static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
// VPERMIL2PD selects with bit#1 of the index vector, so scale IndicesVec.
IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec);
if (Subtarget.hasXOP())
return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64,
LoLo, HiHi, IndicesVec,
DAG.getConstant(0, DL, MVT::i8)));
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, LoLo, HiHi,
IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8)));
// Permute Lo and Hi and then select based on index range.
// This works as VPERMILPD only uses index bit[1] to permute elements.
SDValue Res = DAG.getSelectCC(
@ -10048,7 +10048,7 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
DAG.getUNDEF(ShiftVT), SubVec,
DAG.getIntPtrConstant(0, dl));
Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, SubVec,
DAG.getConstant(Idx * SubVecNumElts, dl, MVT::i8));
DAG.getTargetConstant(Idx * SubVecNumElts, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Op,
DAG.getIntPtrConstant(0, dl));
}
@ -10441,7 +10441,7 @@ static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) {
static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL,
SelectionDAG &DAG) {
return DAG.getConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
return DAG.getTargetConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8);
}
/// Compute whether each element of a shuffle is zeroable.
@ -11086,7 +11086,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
case MVT::v8i16:
assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!");
return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8));
DAG.getTargetConstant(BlendMask, DL, MVT::i8));
case MVT::v16i16: {
assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!");
SmallVector<int, 8> RepeatedMask;
@ -11098,7 +11098,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
if (RepeatedMask[i] >= 8)
BlendMask |= 1ull << i;
return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
DAG.getConstant(BlendMask, DL, MVT::i8));
DAG.getTargetConstant(BlendMask, DL, MVT::i8));
}
// Use PBLENDW for lower/upper lanes and then blend lanes.
// TODO - we should allow 2 PBLENDW here and leave shuffle combine to
@ -11107,9 +11107,9 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
uint64_t HiMask = (BlendMask >> 8) & 0xFF;
if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) {
SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
DAG.getConstant(LoMask, DL, MVT::i8));
DAG.getTargetConstant(LoMask, DL, MVT::i8));
SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
DAG.getConstant(HiMask, DL, MVT::i8));
DAG.getTargetConstant(HiMask, DL, MVT::i8));
return DAG.getVectorShuffle(
MVT::v16i16, DL, Lo, Hi,
{0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31});
@ -11369,7 +11369,7 @@ static SDValue lowerShuffleAsByteRotateAndPermute(
SDValue Rotate = DAG.getBitcast(
VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi),
DAG.getBitcast(ByteVT, Lo),
DAG.getConstant(Scale * RotAmt, DL, MVT::i8)));
DAG.getTargetConstant(Scale * RotAmt, DL, MVT::i8)));
SmallVector<int, 64> PermMask(NumElts, SM_SentinelUndef);
for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) {
for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) {
@ -11576,7 +11576,7 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
"512-bit PALIGNR requires BWI instructions");
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi,
DAG.getConstant(ByteRotation, DL, MVT::i8)));
DAG.getTargetConstant(ByteRotation, DL, MVT::i8)));
}
assert(VT.is128BitVector() &&
@ -11590,10 +11590,12 @@ static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1,
int LoByteShift = 16 - ByteRotation;
int HiByteShift = ByteRotation;
SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo,
DAG.getConstant(LoByteShift, DL, MVT::i8));
SDValue HiShift = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi,
DAG.getConstant(HiByteShift, DL, MVT::i8));
SDValue LoShift =
DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo,
DAG.getTargetConstant(LoByteShift, DL, MVT::i8));
SDValue HiShift =
DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi,
DAG.getTargetConstant(HiByteShift, DL, MVT::i8));
return DAG.getBitcast(VT,
DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift));
}
@ -11625,7 +11627,7 @@ static SDValue lowerShuffleAsRotate(const SDLoc &DL, MVT VT, SDValue V1,
return SDValue();
return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi,
DAG.getConstant(Rotation, DL, MVT::i8));
DAG.getTargetConstant(Rotation, DL, MVT::i8));
}
/// Try to lower a vector shuffle as a byte shift sequence.
@ -11664,27 +11666,27 @@ static SDValue lowerVectorShuffleAsByteShiftMask(
if (ZeroLo == 0) {
unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
DAG.getConstant(Scale * Shift, DL, MVT::i8));
DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
DAG.getConstant(Scale * ZeroHi, DL, MVT::i8));
DAG.getTargetConstant(Scale * ZeroHi, DL, MVT::i8));
} else if (ZeroHi == 0) {
unsigned Shift = Mask[ZeroLo] % NumElts;
Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
DAG.getConstant(Scale * Shift, DL, MVT::i8));
DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
DAG.getConstant(Scale * ZeroLo, DL, MVT::i8));
DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8));
} else if (!Subtarget.hasSSSE3()) {
// If we don't have PSHUFB then its worth avoiding an AND constant mask
// by performing 3 byte shifts. Shuffle combining can kick in above that.
// TODO: There may be some cases where VSH{LR}DQ+PAND is still better.
unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts);
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
DAG.getConstant(Scale * Shift, DL, MVT::i8));
DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
Shift += Mask[ZeroLo] % NumElts;
Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res,
DAG.getConstant(Scale * Shift, DL, MVT::i8));
DAG.getTargetConstant(Scale * Shift, DL, MVT::i8));
Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res,
DAG.getConstant(Scale * ZeroLo, DL, MVT::i8));
DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8));
} else
return SDValue();
@ -11806,7 +11808,7 @@ static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
"Illegal integer vector type");
V = DAG.getBitcast(ShiftVT, V);
V = DAG.getNode(Opcode, DL, ShiftVT, V,
DAG.getConstant(ShiftAmt, DL, MVT::i8));
DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
return DAG.getBitcast(VT, V);
}
@ -11940,14 +11942,14 @@ static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
uint64_t BitLen, BitIdx;
if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable))
return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1,
DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getConstant(BitIdx, DL, MVT::i8));
DAG.getTargetConstant(BitLen, DL, MVT::i8),
DAG.getTargetConstant(BitIdx, DL, MVT::i8));
if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx))
return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT),
V2 ? V2 : DAG.getUNDEF(VT),
DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getConstant(BitIdx, DL, MVT::i8));
DAG.getTargetConstant(BitLen, DL, MVT::i8),
DAG.getTargetConstant(BitIdx, DL, MVT::i8));
return SDValue();
}
@ -12044,8 +12046,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
int LoIdx = Offset * EltBits;
SDValue Lo = DAG.getBitcast(
MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
DAG.getConstant(EltBits, DL, MVT::i8),
DAG.getConstant(LoIdx, DL, MVT::i8)));
DAG.getTargetConstant(EltBits, DL, MVT::i8),
DAG.getTargetConstant(LoIdx, DL, MVT::i8)));
if (isUndefUpperHalf(Mask) || !SafeOffset(Offset + 1))
return DAG.getBitcast(VT, Lo);
@ -12053,8 +12055,8 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
int HiIdx = (Offset + 1) * EltBits;
SDValue Hi = DAG.getBitcast(
MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
DAG.getConstant(EltBits, DL, MVT::i8),
DAG.getConstant(HiIdx, DL, MVT::i8)));
DAG.getTargetConstant(EltBits, DL, MVT::i8),
DAG.getTargetConstant(HiIdx, DL, MVT::i8)));
return DAG.getBitcast(VT,
DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi));
}
@ -12364,9 +12366,9 @@ static SDValue lowerShuffleAsElementInsertion(
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
} else {
V2 = DAG.getBitcast(MVT::v16i8, V2);
V2 = DAG.getNode(
X86ISD::VSHLDQ, DL, MVT::v16i8, V2,
DAG.getConstant(V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8));
V2 = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, V2,
DAG.getTargetConstant(
V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8));
V2 = DAG.getBitcast(VT, V2);
}
}
@ -12798,7 +12800,7 @@ static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2,
// Insert the V2 element into the desired position.
return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
DAG.getConstant(InsertPSMask, DL, MVT::i8));
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
}
/// Try to lower a shuffle as a permute of the inputs followed by an
@ -12947,14 +12949,14 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// If we have AVX, we can use VPERMILPS which will allow folding a load
// into the shuffle.
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1,
DAG.getConstant(SHUFPDMask, DL, MVT::i8));
DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8));
}
return DAG.getNode(
X86ISD::SHUFP, DL, MVT::v2f64,
Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
DAG.getConstant(SHUFPDMask, DL, MVT::i8));
DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8));
}
assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!");
assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!");
@ -13000,7 +13002,7 @@ static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2,
DAG.getConstant(SHUFPDMask, DL, MVT::i8));
DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8));
}
/// Handle lowering of 2-lane 64-bit integer shuffles.
@ -14880,8 +14882,8 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) {
unsigned PermMask = ((WidenedMask[0] % 2) << 0) |
((WidenedMask[1] % 2) << 1);
return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
DAG.getConstant(PermMask, DL, MVT::i8));
return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
DAG.getTargetConstant(PermMask, DL, MVT::i8));
}
}
}
@ -14913,7 +14915,7 @@ static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1,
V2 = DAG.getUNDEF(VT);
return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2,
DAG.getConstant(PermMask, DL, MVT::i8));
DAG.getTargetConstant(PermMask, DL, MVT::i8));
}
/// Lower a vector shuffle by first fixing the 128-bit lanes and then
@ -15542,7 +15544,7 @@ static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1,
V2 = getZeroVector(VT, Subtarget, DAG, DL);
return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
DAG.getConstant(Immediate, DL, MVT::i8));
DAG.getTargetConstant(Immediate, DL, MVT::i8));
}
/// Handle lowering of 4-lane 64-bit floating point shuffles.
@ -15577,7 +15579,7 @@ static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) |
((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3);
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1,
DAG.getConstant(VPERMILPMask, DL, MVT::i8));
DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8));
}
// With AVX2 we have direct support for this permutation.
@ -16316,7 +16318,7 @@ static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask,
}
return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1],
DAG.getConstant(PermMask, DL, MVT::i8));
DAG.getTargetConstant(PermMask, DL, MVT::i8));
}
/// Handle lowering of 8-lane 64-bit floating point shuffles.
@ -16341,7 +16343,7 @@ static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) |
((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7);
return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1,
DAG.getConstant(VPERMILPMask, DL, MVT::i8));
DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8));
}
SmallVector<int, 4> RepeatedMask;
@ -16770,7 +16772,7 @@ static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef<int> Mask,
DAG.getUNDEF(WideVT), V1,
DAG.getIntPtrConstant(0, DL));
Res = DAG.getNode(X86ISD::KSHIFTR, DL, WideVT, Res,
DAG.getConstant(ShiftAmt, DL, MVT::i8));
DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
DAG.getIntPtrConstant(0, DL));
}
@ -16877,13 +16879,13 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
int WideElts = WideVT.getVectorNumElements();
// Shift left to put the original vector in the MSBs of the new size.
Res = DAG.getNode(X86ISD::KSHIFTL, DL, WideVT, Res,
DAG.getConstant(WideElts - NumElts, DL, MVT::i8));
DAG.getTargetConstant(WideElts - NumElts, DL, MVT::i8));
// Increase the shift amount to account for the left shift.
ShiftAmt += WideElts - NumElts;
}
Res = DAG.getNode(Opcode, DL, WideVT, Res,
DAG.getConstant(ShiftAmt, DL, MVT::i8));
DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
DAG.getIntPtrConstant(0, DL));
}
@ -17331,7 +17333,7 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
// Use kshiftr instruction to move to the lower element.
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
DAG.getConstant(IdxVal, dl, MVT::i8));
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
DAG.getIntPtrConstant(0, dl));
@ -17559,7 +17561,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
(Subtarget.hasAVX2() && EltVT == MVT::i32)) {
SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1);
return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec,
DAG.getConstant(1, dl, MVT::i8));
DAG.getTargetConstant(1, dl, MVT::i8));
}
}
@ -17635,7 +17637,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
// Create this as a scalar to vector..
N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1,
DAG.getConstant(IdxVal << 4, dl, MVT::i8));
DAG.getTargetConstant(IdxVal << 4, dl, MVT::i8));
}
// PINSR* works with constant index.
@ -17721,7 +17723,7 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
// Shift to the LSB.
Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec,
DAG.getConstant(IdxVal, dl, MVT::i8));
DAG.getTargetConstant(IdxVal, dl, MVT::i8));
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec,
DAG.getIntPtrConstant(0, dl));
@ -18264,8 +18266,8 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
APInt APIntShiftAmt;
if (X86::isConstantSplat(Amt, APIntShiftAmt)) {
uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits());
return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT,
Op0, Op1, DAG.getConstant(ShiftAmt, DL, MVT::i8));
return DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, VT, Op0,
Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8));
}
return DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, VT,
@ -18697,7 +18699,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
// Low will be bitcasted right away, so do not bother bitcasting back to its
// original type.
Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast,
VecCstLowBitcast, DAG.getConstant(0xaa, DL, MVT::i8));
VecCstLowBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8));
// uint4 hi = _mm_blend_epi16( _mm_srli_epi32(v,16),
// (uint4) 0x53000000, 0xaa);
SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh);
@ -18705,7 +18707,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
// High will be bitcasted right away, so do not bother bitcasting back to
// its original type.
High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast,
VecCstHighBitcast, DAG.getConstant(0xaa, DL, MVT::i8));
VecCstHighBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8));
} else {
SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT);
// uint4 lo = (v & (uint4) 0xffff) | (uint4) 0x4b000000;
@ -20655,14 +20657,14 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
}
SDValue Cmp0 = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CC0, dl, MVT::i8));
DAG.getTargetConstant(CC0, dl, MVT::i8));
SDValue Cmp1 = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CC1, dl, MVT::i8));
DAG.getTargetConstant(CC1, dl, MVT::i8));
Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1);
} else {
// Handle all other FP comparisons here.
Cmp = DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(SSECC, dl, MVT::i8));
DAG.getTargetConstant(SSECC, dl, MVT::i8));
}
// If this is SSE/AVX CMPP, bitcast the result back to integer to match the
@ -20725,7 +20727,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM;
return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CmpMode, dl, MVT::i8));
DAG.getTargetConstant(CmpMode, dl, MVT::i8));
}
// (X & Y) != 0 --> (X & Y) == Y iff Y is power-of-2.
@ -21195,15 +21197,16 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
if (Subtarget.hasAVX512()) {
SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0,
CondOp1, DAG.getConstant(SSECC, DL, MVT::i8));
SDValue Cmp =
DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, CondOp1,
DAG.getTargetConstant(SSECC, DL, MVT::i8));
assert(!VT.isVector() && "Not a scalar type?");
return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2);
}
if (SSECC < 8 || Subtarget.hasAVX()) {
SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
DAG.getConstant(SSECC, DL, MVT::i8));
DAG.getTargetConstant(SSECC, DL, MVT::i8));
// If we have AVX, we can use a variable vector select (VBLENDV) instead
// of 3 logic instructions for size savings and potentially speed.
@ -21661,7 +21664,7 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
unsigned SignExtShift = DestWidth - InSVT.getSizeInBits();
SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr,
DAG.getConstant(SignExtShift, dl, MVT::i8));
DAG.getTargetConstant(SignExtShift, dl, MVT::i8));
}
if (VT == MVT::v2i64) {
@ -22656,7 +22659,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
}
return DAG.getNode(Opc, dl, VT, SrcOp,
DAG.getConstant(ShiftAmt, dl, MVT::i8));
DAG.getTargetConstant(ShiftAmt, dl, MVT::i8));
}
/// Handle vector element shifts where the shift amount may or may not be a
@ -22701,7 +22704,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT,
ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt),
MVT::v2i64, ShAmt);
else {
SDValue ByteShift = DAG.getConstant(
SDValue ByteShift = DAG.getTargetConstant(
(128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8);
ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt);
ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt,
@ -22999,9 +23002,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
if (IntrData->Type == INTR_TYPE_3OP_IMM8)
Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
@ -23254,7 +23254,6 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case CMP_MASK_CC: {
MVT MaskVT = Op.getSimpleValueType();
SDValue CC = Op.getOperand(3);
CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, CC);
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
@ -23273,7 +23272,7 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case CMP_MASK_SCALAR_CC: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3));
SDValue CC = Op.getOperand(3);
SDValue Mask = Op.getOperand(4);
SDValue Cmp;
@ -23344,10 +23343,10 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue FCmp;
if (isRoundModeCurDirection(Sae))
FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS,
DAG.getConstant(CondVal, dl, MVT::i8));
DAG.getTargetConstant(CondVal, dl, MVT::i8));
else if (isRoundModeSAE(Sae))
FCmp = DAG.getNode(X86ISD::FSETCCM_SAE, dl, MVT::v1i1, LHS, RHS,
DAG.getConstant(CondVal, dl, MVT::i8), Sae);
DAG.getTargetConstant(CondVal, dl, MVT::i8), Sae);
else
return SDValue();
// Need to fill with zeros to ensure the bitcast will produce zeroes
@ -23407,9 +23406,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode");
// Clear the upper bits of the rounding immediate so that the legacy
// intrinsic can't trigger the scaling behavior of VRNDSCALE.
SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32,
Op.getOperand(2),
DAG.getConstant(0xf, dl, MVT::i32));
auto Round = cast<ConstantSDNode>(Op.getOperand(2));
SDValue RoundingMode =
DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32);
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(1), RoundingMode);
}
@ -23417,9 +23416,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode");
// Clear the upper bits of the rounding immediate so that the legacy
// intrinsic can't trigger the scaling behavior of VRNDSCALE.
SDValue RoundingMode = DAG.getNode(ISD::AND, dl, MVT::i32,
Op.getOperand(3),
DAG.getConstant(0xf, dl, MVT::i32));
auto Round = cast<ConstantSDNode>(Op.getOperand(3));
SDValue RoundingMode =
DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32);
return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2), RoundingMode);
}
@ -26096,7 +26095,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
(VT == MVT::v32i8 && Subtarget.hasInt256())) &&
!Subtarget.hasXOP()) {
int NumElts = VT.getVectorNumElements();
SDValue Cst8 = DAG.getConstant(8, dl, MVT::i8);
SDValue Cst8 = DAG.getTargetConstant(8, dl, MVT::i8);
// Extend constant shift amount to vXi16 (it doesn't matter if the type
// isn't legal).
@ -26368,7 +26367,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
unsigned Op = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI);
uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
return DAG.getNode(Op, DL, VT, R,
DAG.getConstant(RotateAmt, DL, MVT::i8));
DAG.getTargetConstant(RotateAmt, DL, MVT::i8));
}
// Else, fall-back on VPROLV/VPRORV.
@ -26389,7 +26388,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
if (0 <= CstSplatIndex) {
uint64_t RotateAmt = EltBits[CstSplatIndex].urem(EltSizeInBits);
return DAG.getNode(X86ISD::VROTLI, DL, VT, R,
DAG.getConstant(RotateAmt, DL, MVT::i8));
DAG.getTargetConstant(RotateAmt, DL, MVT::i8));
}
// Use general rotate by variable (per-element).
@ -26626,7 +26625,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
// If this is a canonical idempotent atomicrmw w/no uses, we have a better
// lowering available in lowerAtomicArith.
// TODO: push more cases through this path.
// TODO: push more cases through this path.
if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand()))
if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() &&
AI->use_empty())
@ -26696,7 +26695,7 @@ bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
/// Emit a locked operation on a stack location which does not change any
/// memory location, but does involve a lock prefix. Location is chosen to be
/// a) very likely accessed only by a single thread to minimize cache traffic,
/// and b) definitely dereferenceable. Returns the new Chain result.
/// and b) definitely dereferenceable. Returns the new Chain result.
static SDValue emitLockedStackOp(SelectionDAG &DAG,
const X86Subtarget &Subtarget,
SDValue Chain, SDLoc DL) {
@ -26705,22 +26704,22 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG,
// operations issued by the current processor. As such, the location
// referenced is not relevant for the ordering properties of the instruction.
// See: Intel® 64 and IA-32 ArchitecturesSoftware Developers Manual,
// 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions
// 8.2.3.9 Loads and Stores Are Not Reordered with Locked Instructions
// 2) Using an immediate operand appears to be the best encoding choice
// here since it doesn't require an extra register.
// 3) OR appears to be very slightly faster than ADD. (Though, the difference
// is small enough it might just be measurement noise.)
// 4) When choosing offsets, there are several contributing factors:
// a) If there's no redzone, we default to TOS. (We could allocate a cache
// line aligned stack object to improve this case.)
// line aligned stack object to improve this case.)
// b) To minimize our chances of introducing a false dependence, we prefer
// to offset the stack usage from TOS slightly.
// to offset the stack usage from TOS slightly.
// c) To minimize concerns about cross thread stack usage - in particular,
// the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which
// captures state in the TOS frame and accesses it from many threads -
// we want to use an offset such that the offset is in a distinct cache
// line from the TOS frame.
//
//
// For a general discussion of the tradeoffs and benchmark results, see:
// https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
@ -26773,7 +26772,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
if (Subtarget.hasMFence())
return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
SDValue Chain = Op.getOperand(0);
SDValue Chain = Op.getOperand(0);
return emitLockedStackOp(DAG, Subtarget, Chain, dl);
}
@ -27256,12 +27255,12 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
// seq_cst which isn't SingleThread, everything just needs to be preserved
// during codegen and then dropped. Note that we expect (but don't assume),
// that orderings other than seq_cst and acq_rel have been canonicalized to
// a store or load.
// a store or load.
if (AN->getOrdering() == AtomicOrdering::SequentiallyConsistent &&
AN->getSyncScopeID() == SyncScope::System) {
// Prefer a locked operation against a stack location to minimize cache
// traffic. This assumes that stack locations are very likely to be
// accessed only by the owning thread.
// accessed only by the owning thread.
SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL);
assert(!N->hasAnyUseOfValue(0));
// NOTE: The getUNDEF is needed to give something for the unused result 0.
@ -32636,7 +32635,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
Res = DAG.getBitcast(ShuffleVT, V1);
Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res,
DAG.getUNDEF(ShuffleVT),
DAG.getConstant(PermMask, DL, MVT::i8));
DAG.getTargetConstant(PermMask, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
}
@ -32743,7 +32742,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return SDValue(); // Nothing to do!
Res = DAG.getBitcast(ShuffleVT, V1);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
DAG.getConstant(PermuteImm, DL, MVT::i8));
DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
}
}
@ -32773,7 +32772,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
NewV1 = DAG.getBitcast(ShuffleVT, NewV1);
NewV2 = DAG.getBitcast(ShuffleVT, NewV2);
Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2,
DAG.getConstant(PermuteImm, DL, MVT::i8));
DAG.getTargetConstant(PermuteImm, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
}
@ -32790,8 +32789,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return SDValue(); // Nothing to do!
V1 = DAG.getBitcast(IntMaskVT, V1);
Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1,
DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getConstant(BitIdx, DL, MVT::i8));
DAG.getTargetConstant(BitLen, DL, MVT::i8),
DAG.getTargetConstant(BitIdx, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
}
@ -32801,8 +32800,8 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
V1 = DAG.getBitcast(IntMaskVT, V1);
V2 = DAG.getBitcast(IntMaskVT, V2);
Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2,
DAG.getConstant(BitLen, DL, MVT::i8),
DAG.getConstant(BitIdx, DL, MVT::i8));
DAG.getTargetConstant(BitLen, DL, MVT::i8),
DAG.getTargetConstant(BitIdx, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
}
}
@ -32966,7 +32965,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
V2 = DAG.getBitcast(MaskVT, V2);
SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true);
Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp,
DAG.getConstant(M2ZImm, DL, MVT::i8));
DAG.getTargetConstant(M2ZImm, DL, MVT::i8));
return DAG.getBitcast(RootVT, Res);
}
@ -33785,7 +33784,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return DAG.getBitcast(
VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0),
N1.getOperand(0),
DAG.getConstant(BlendMask, DL, MVT::i8)));
DAG.getTargetConstant(BlendMask, DL, MVT::i8)));
}
}
return SDValue();
@ -33853,12 +33852,12 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
// If we zero out all elements from Op0 then we don't need to reference it.
if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef())
return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1,
DAG.getConstant(InsertPSMask, DL, MVT::i8));
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
// If we zero out the element from Op1 then we don't need to reference it.
if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef())
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
DAG.getConstant(InsertPSMask, DL, MVT::i8));
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
// Attempt to merge insertps Op1 with an inner target shuffle node.
SmallVector<int, 8> TargetMask1;
@ -33869,14 +33868,14 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
// Zero/UNDEF insertion - zero out element and remove dependency.
InsertPSMask |= (1u << DstIdx);
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT),
DAG.getConstant(InsertPSMask, DL, MVT::i8));
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
}
// Update insertps mask srcidx and reference the source input directly.
assert(0 <= M && M < 8 && "Shuffle index out of range");
InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6);
Op1 = Ops1[M < 4 ? 0 : 1];
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
DAG.getConstant(InsertPSMask, DL, MVT::i8));
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
}
// Attempt to merge insertps Op0 with an inner target shuffle node.
@ -33919,7 +33918,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
if (Updated)
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1,
DAG.getConstant(InsertPSMask, DL, MVT::i8));
DAG.getTargetConstant(InsertPSMask, DL, MVT::i8));
}
// If we're inserting an element from a vbroadcast of a load, fold the
@ -33932,7 +33931,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0,
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT,
Op1.getOperand(0)),
DAG.getConstant(InsertPSMask & 0x3f, DL, MVT::i8));
DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8));
return SDValue();
}
@ -34666,7 +34665,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
}
SDLoc dl(Op);
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8);
SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8);
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA));
}
@ -34705,7 +34704,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
}
SDLoc dl(Op);
SDValue NewSA = TLO.DAG.getConstant(Diff, dl, MVT::i8);
SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8);
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Src.getOperand(0), NewSA));
}
@ -35108,7 +35107,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI;
SDValue NewShift = TLO.DAG.getNode(
NewOpc, SDLoc(Op), VT, Op0.getOperand(0),
TLO.DAG.getConstant(std::abs(Diff), SDLoc(Op), MVT::i8));
TLO.DAG.getTargetConstant(std::abs(Diff), SDLoc(Op), MVT::i8));
return TLO.CombineTo(Op, NewShift);
}
}
@ -35754,8 +35753,8 @@ static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG,
unsigned ShufMask = (NumElts > 2 ? 0 : 0x44);
return DAG.getNode(
ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx,
DAG.getConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32), Splat,
DAG.getConstant(ShufMask, DL, MVT::i8));
DAG.getTargetConstant(Intrinsic::x86_sse_pshuf_w, DL, MVT::i32),
Splat, DAG.getTargetConstant(ShufMask, DL, MVT::i8));
}
Ops.append(NumElts, Splat);
} else {
@ -36511,7 +36510,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
}
// TODO: This switch could include FNEG and the x86-specific FP logic ops
// (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
// (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
// missed load folding and fma+fneg combining.
switch (Vec.getOpcode()) {
case ISD::FMA: // Begin 3 operands
@ -38912,7 +38911,7 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
if (NewShiftVal >= NumBitsPerElt)
NewShiftVal = NumBitsPerElt - 1;
return DAG.getNode(X86ISD::VSRAI, SDLoc(N), VT, N0.getOperand(0),
DAG.getConstant(NewShiftVal, SDLoc(N), MVT::i8));
DAG.getTargetConstant(NewShiftVal, SDLoc(N), MVT::i8));
}
// We can decode 'whole byte' logical bit shifts as shuffles.
@ -39032,7 +39031,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
if (Subtarget.hasAVX512()) {
SDValue FSetCC =
DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01,
DAG.getConstant(x86cc, DL, MVT::i8));
DAG.getTargetConstant(x86cc, DL, MVT::i8));
// Need to fill with zeros to ensure the bitcast will produce zeroes
// for the upper bits. An EXTRACT_ELEMENT here wouldn't guarantee that.
SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1,
@ -39041,10 +39040,9 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL,
N->getSimpleValueType(0));
}
SDValue OnesOrZeroesF = DAG.getNode(X86ISD::FSETCC, DL,
CMP00.getValueType(), CMP00, CMP01,
DAG.getConstant(x86cc, DL,
MVT::i8));
SDValue OnesOrZeroesF =
DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00,
CMP01, DAG.getTargetConstant(x86cc, DL, MVT::i8));
bool is64BitFP = (CMP00.getValueType() == MVT::f64);
MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32;
@ -39238,7 +39236,7 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
unsigned ShiftVal = SplatVal.countTrailingOnes();
SDValue ShAmt = DAG.getConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt);
return DAG.getBitcast(N->getValueType(0), Shift);
}

View File

@ -753,14 +753,14 @@ let isCommutable = 1 in
def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
[(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, timm:$src3))]>,
EVEX_4V, Sched<[SchedWriteFShuffle.XMM]>;
def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
(ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR128X:$dst, (X86insertps VR128X:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))]>,
timm:$src3))]>,
EVEX_4V, EVEX_CD8<32, CD8VT1>,
Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
@ -2054,9 +2054,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
(OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
(OpNode_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
imm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
timm:$cc)>, EVEX_4V, VEX_LIG, Sched<[sched]>;
let mayLoad = 1 in
defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
(outs _.KRC:$dst),
@ -2064,9 +2064,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
(OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
imm:$cc),
timm:$cc),
(OpNode_su (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2,
imm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
timm:$cc)>, EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
@ -2075,9 +2075,9 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
"vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc",
(OpNodeSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
imm:$cc),
timm:$cc),
(OpNodeSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
imm:$cc)>,
timm:$cc)>,
EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>;
let isCodeGenOnly = 1 in {
@ -2088,7 +2088,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
_.FRC:$src2,
imm:$cc))]>,
timm:$cc))]>,
EVEX_4V, VEX_LIG, Sched<[sched]>;
def rm : AVX512Ii8<0xC2, MRMSrcMem,
(outs _.KRC:$dst),
@ -2097,7 +2097,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeSAE,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set _.KRC:$dst, (OpNode _.FRC:$src1,
(_.ScalarLdFrag addr:$src2),
imm:$cc))]>,
timm:$cc))]>,
EVEX_4V, VEX_LIG, EVEX_CD8<_.EltSize, CD8VT1>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@ -2530,8 +2530,8 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
(outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,u8imm:$cc),
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
(X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
(X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
(X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
(X86cmpm_su (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
1>, Sched<[sched]>;
defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
@ -2539,9 +2539,9 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
"vcmp"#_.Suffix,
"$cc, $src2, $src1", "$src1, $src2, $cc",
(X86cmpm (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
imm:$cc),
timm:$cc),
(X86cmpm_su (_.VT _.RC:$src1), (_.VT (_.LdFrag addr:$src2)),
imm:$cc)>,
timm:$cc)>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
@ -2552,10 +2552,10 @@ multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _,
"$src1, ${src2}"#_.BroadcastStr#", $cc",
(X86cmpm (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
imm:$cc),
timm:$cc),
(X86cmpm_su (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
imm:$cc)>,
timm:$cc)>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
// Patterns for selecting with loads in other operand.
@ -2592,9 +2592,9 @@ multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> {
"vcmp"#_.Suffix,
"$cc, {sae}, $src2, $src1",
"$src1, $src2, {sae}, $cc",
(X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc),
(X86cmpmSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2), timm:$cc),
(X86cmpmSAE_su (_.VT _.RC:$src1), (_.VT _.RC:$src2),
imm:$cc)>,
timm:$cc)>,
EVEX_B, Sched<[sched]>;
}
@ -2649,7 +2649,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclasss (_.VT _.RC:$src1),
(i32 imm:$src2)))]>,
(i32 timm:$src2)))]>,
Sched<[sched]>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
@ -2657,7 +2657,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
(X86Vfpclasss_su (_.VT _.RC:$src1),
(i32 imm:$src2))))]>,
(i32 timm:$src2))))]>,
EVEX_K, Sched<[sched]>;
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.IntScalarMemOp:$src1, i32u8imm:$src2),
@ -2665,7 +2665,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,
(X86Vfpclasss _.ScalarIntMemCPat:$src1,
(i32 imm:$src2)))]>,
(i32 timm:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2),
@ -2673,7 +2673,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
(X86Vfpclasss_su _.ScalarIntMemCPat:$src1,
(i32 imm:$src2))))]>,
(i32 timm:$src2))))]>,
EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@ -2689,7 +2689,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclass (_.VT _.RC:$src1),
(i32 imm:$src2)))]>,
(i32 timm:$src2)))]>,
Sched<[sched]>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
@ -2697,7 +2697,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(and _.KRCWM:$mask,
(X86Vfpclass_su (_.VT _.RC:$src1),
(i32 imm:$src2))))]>,
(i32 timm:$src2))))]>,
EVEX_K, Sched<[sched]>;
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
@ -2705,7 +2705,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(X86Vfpclass
(_.VT (_.LdFrag addr:$src1)),
(i32 imm:$src2)))]>,
(i32 timm:$src2)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
@ -2713,7 +2713,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
"\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst, (and _.KRCWM:$mask, (X86Vfpclass_su
(_.VT (_.LdFrag addr:$src1)),
(i32 imm:$src2))))]>,
(i32 timm:$src2))))]>,
EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
@ -2723,7 +2723,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
[(set _.KRC:$dst,(X86Vfpclass
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2)))]>,
(i32 timm:$src2)))]>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
@ -2733,7 +2733,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr,
[(set _.KRC:$dst,(and _.KRCWM:$mask, (X86Vfpclass_su
(_.VT (X86VBroadcast
(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2))))]>,
(i32 timm:$src2))))]>,
EVEX_B, EVEX_K, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@ -3111,7 +3111,7 @@ multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm),
!strconcat(OpcodeStr,
"\t{$imm, $src, $dst|$dst, $src, $imm}"),
[(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>,
[(set KRC:$dst, (OpNode KRC:$src, (i8 timm:$imm)))]>,
Sched<[sched]>;
}
@ -3187,7 +3187,7 @@ multiclass axv512_cmp_packed_cc_no_vlx_lowering<SDNode OpNode, PatFrag OpNode_su
X86VectorVTInfo Narrow,
X86VectorVTInfo Wide> {
def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2), imm:$cc)),
(Narrow.VT Narrow.RC:$src2), timm:$cc)),
(COPY_TO_REGCLASS
(!cast<Instruction>(InstStr##Zrri)
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@ -3196,7 +3196,7 @@ def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1),
def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
(OpNode_su (Narrow.VT Narrow.RC:$src1),
(Narrow.VT Narrow.RC:$src2), imm:$cc))),
(Narrow.VT Narrow.RC:$src2), timm:$cc))),
(COPY_TO_REGCLASS (!cast<Instruction>(InstStr##Zrrik)
(COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC),
(Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)),
@ -5787,13 +5787,13 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
(ins _.RC:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>,
(_.VT (OpNode _.RC:$src1, (i8 timm:$src2)))>,
Sched<[sched]>;
defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
(_.VT (OpNode (_.VT (_.LdFrag addr:$src1)),
(i8 imm:$src2)))>,
(i8 timm:$src2)))>,
Sched<[sched.Folded]>;
}
}
@ -5805,7 +5805,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
"$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>,
(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 timm:$src2)))>,
EVEX_B, Sched<[sched.Folded]>;
}
@ -5947,13 +5947,13 @@ let Predicates = [HasAVX512, NoVLX] in {
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
VR128X:$src2)), sub_xmm)>;
def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 imm:$src2))),
def : Pat<(v4i64 (X86vsrai (v4i64 VR256X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPSRAQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
imm:$src2)), sub_ymm)>;
def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 imm:$src2))),
def : Pat<(v2i64 (X86vsrai (v2i64 VR128X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPSRAQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
@ -6098,23 +6098,23 @@ let Predicates = [HasAVX512, NoVLX] in {
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
sub_ymm)>;
def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 imm:$src2))),
def : Pat<(v2i64 (X86vrotli (v2i64 VR128X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPROLQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
imm:$src2)), sub_xmm)>;
def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 imm:$src2))),
def : Pat<(v4i64 (X86vrotli (v4i64 VR256X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPROLQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
imm:$src2)), sub_ymm)>;
def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 imm:$src2))),
def : Pat<(v4i32 (X86vrotli (v4i32 VR128X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v16i32
(VPROLDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
imm:$src2)), sub_xmm)>;
def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 imm:$src2))),
def : Pat<(v8i32 (X86vrotli (v8i32 VR256X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v16i32
(VPROLDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
@ -6149,23 +6149,23 @@ let Predicates = [HasAVX512, NoVLX] in {
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)))),
sub_ymm)>;
def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 imm:$src2))),
def : Pat<(v2i64 (X86vrotri (v2i64 VR128X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPRORQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
imm:$src2)), sub_xmm)>;
def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 imm:$src2))),
def : Pat<(v4i64 (X86vrotri (v4i64 VR256X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v8i64
(VPRORQZri
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
imm:$src2)), sub_ymm)>;
def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 imm:$src2))),
def : Pat<(v4i32 (X86vrotri (v4i32 VR128X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v16i32
(VPRORDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR128X:$src1, sub_xmm)),
imm:$src2)), sub_xmm)>;
def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 imm:$src2))),
def : Pat<(v8i32 (X86vrotri (v8i32 VR256X:$src1), (i8 timm:$src2))),
(EXTRACT_SUBREG (v16i32
(VPRORDZri
(v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
@ -8612,21 +8612,21 @@ let ExeDomain = GenericDomain in {
(ins _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _dest.RC:$dst,
(X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)))]>,
(X86cvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2)))]>,
Sched<[RR]>;
let Constraints = "$src0 = $dst" in
def rrk : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
(ins _dest.RC:$src0, _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _dest.RC:$dst,
(X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
(X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
_dest.RC:$src0, _src.KRCWM:$mask))]>,
Sched<[RR]>, EVEX_K;
def rrkz : AVX512AIi8<0x1D, MRMDestReg, (outs _dest.RC:$dst),
(ins _src.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}",
[(set _dest.RC:$dst,
(X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2),
(X86mcvtps2ph (_src.VT _src.RC:$src1), (i32 timm:$src2),
_dest.ImmAllZerosV, _src.KRCWM:$mask))]>,
Sched<[RR]>, EVEX_KZ;
let hasSideEffects = 0, mayStore = 1 in {
@ -9085,14 +9085,14 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$src3)))>,
(i32 timm:$src3)))>,
Sched<[sched]>;
defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
"$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
(_.VT (X86RndScalesSAE (_.VT _.RC:$src1), (_.VT _.RC:$src2),
(i32 imm:$src3)))>, EVEX_B,
(i32 timm:$src3)))>, EVEX_B,
Sched<[sched]>;
defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
@ -9100,7 +9100,7 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
OpcodeStr,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86RndScales _.RC:$src1,
_.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>,
_.ScalarIntMemCPat:$src2, (i32 timm:$src3)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in {
@ -9118,13 +9118,13 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr,
}
let Predicates = [HasAVX512] in {
def : Pat<(X86VRndScale _.FRC:$src1, imm:$src2),
def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2),
(_.EltVT (!cast<Instruction>(NAME##r) (_.EltVT (IMPLICIT_DEF)),
_.FRC:$src1, imm:$src2))>;
}
let Predicates = [HasAVX512, OptForSize] in {
def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), imm:$src2),
def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2),
(_.EltVT (!cast<Instruction>(NAME##m) (_.EltVT (IMPLICIT_DEF)),
addr:$src1, imm:$src2))>;
}
@ -10145,19 +10145,19 @@ multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNo
(ins _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2))>, Sched<[sched]>;
(i32 timm:$src2))>, Sched<[sched]>;
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2))>,
(i32 timm:$src2))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
"${src1}"##_.BroadcastStr##", $src2",
(OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
(i32 imm:$src2))>, EVEX_B,
(i32 timm:$src2))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@ -10172,7 +10172,7 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
"$src1, {sae}, $src2",
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2))>,
(i32 timm:$src2))>,
EVEX_B, Sched<[sched]>;
}
@ -10205,14 +10205,14 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 imm:$src3))>,
(i32 timm:$src3))>,
Sched<[sched]>;
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (bitconvert (_.LdFrag addr:$src2))),
(i32 imm:$src3))>,
(i32 timm:$src3))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
@ -10220,7 +10220,7 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src1, ${src2}"##_.BroadcastStr##", $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i32 imm:$src3))>, EVEX_B,
(i32 timm:$src3))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@ -10236,7 +10236,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
(SrcInfo.VT SrcInfo.RC:$src2),
(i8 imm:$src3)))>,
(i8 timm:$src3)))>,
Sched<[sched]>;
defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
@ -10244,7 +10244,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
(SrcInfo.VT (bitconvert
(SrcInfo.LdFrag addr:$src2))),
(i8 imm:$src3)))>,
(i8 timm:$src3)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@ -10263,7 +10263,7 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src1, ${src2}"##_.BroadcastStr##", $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3))>, EVEX_B,
(i8 timm:$src3))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@ -10277,7 +10277,7 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 imm:$src3))>,
(i32 timm:$src3))>,
Sched<[sched]>;
defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
@ -10301,7 +10301,7 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
"$src1, $src2, {sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 imm:$src3))>,
(i32 timm:$src3))>,
EVEX_B, Sched<[sched]>;
}
@ -10315,7 +10315,7 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode
"$src1, $src2, {sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 imm:$src3))>,
(i32 timm:$src3))>,
EVEX_B, Sched<[sched]>;
}
@ -10437,7 +10437,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (bitconvert
(CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2,
(i8 imm:$src3)))))>,
(i8 timm:$src3)))))>,
Sched<[sched]>, EVEX2VEXOverride<EVEX2VEXOvrd#"rr">;
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
@ -10446,7 +10446,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
(bitconvert
(CastInfo.VT (X86Shuf128 _.RC:$src1,
(CastInfo.LdFrag addr:$src2),
(i8 imm:$src3)))))>,
(i8 timm:$src3)))))>,
Sched<[sched.Folded, sched.ReadAfterFold]>,
EVEX2VEXOverride<EVEX2VEXOvrd#"rm">;
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
@ -10458,7 +10458,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr,
(CastInfo.VT
(X86Shuf128 _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2)),
(i8 imm:$src3)))))>, EVEX_B,
(i8 timm:$src3)))))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@ -10527,14 +10527,14 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))>,
(_.VT (X86VAlign _.RC:$src1, _.RC:$src2, (i8 timm:$src3)))>,
Sched<[sched]>, EVEX2VEXOverride<"VPALIGNRrri">;
defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(_.VT (X86VAlign _.RC:$src1,
(bitconvert (_.LdFrag addr:$src2)),
(i8 imm:$src3)))>,
(i8 timm:$src3)))>,
Sched<[sched.Folded, sched.ReadAfterFold]>,
EVEX2VEXOverride<"VPALIGNRrmi">;
@ -10544,7 +10544,7 @@ multiclass avx512_valign<bits<8> opc, string OpcodeStr,
"$src1, ${src2}"##_.BroadcastStr##", $src3",
(X86VAlign _.RC:$src1,
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
(i8 imm:$src3))>, EVEX_B,
(i8 timm:$src3))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@ -10593,7 +10593,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
def : Pat<(To.VT (vselect To.KRCWM:$mask,
(bitconvert
(From.VT (OpNode From.RC:$src1, From.RC:$src2,
imm:$src3))),
timm:$src3))),
To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rrik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, To.RC:$src2,
@ -10602,7 +10602,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
def : Pat<(To.VT (vselect To.KRCWM:$mask,
(bitconvert
(From.VT (OpNode From.RC:$src1, From.RC:$src2,
imm:$src3))),
timm:$src3))),
To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rrikz") To.KRCWM:$mask,
To.RC:$src1, To.RC:$src2,
@ -10612,7 +10612,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
(bitconvert
(From.VT (OpNode From.RC:$src1,
(From.LdFrag addr:$src2),
imm:$src3))),
timm:$src3))),
To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rmik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
@ -10622,7 +10622,7 @@ multiclass avx512_vpalign_mask_lowering<string OpcodeStr, SDNode OpNode,
(bitconvert
(From.VT (OpNode From.RC:$src1,
(From.LdFrag addr:$src2),
imm:$src3))),
timm:$src3))),
To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rmikz") To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
@ -10637,7 +10637,7 @@ multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
def : Pat<(From.VT (OpNode From.RC:$src1,
(bitconvert (To.VT (X86VBroadcast
(To.ScalarLdFrag addr:$src2)))),
imm:$src3)),
timm:$src3)),
(!cast<Instruction>(OpcodeStr#"rmbi") To.RC:$src1, addr:$src2,
(ImmXForm imm:$src3))>;
@ -10647,7 +10647,7 @@ multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
(bitconvert
(To.VT (X86VBroadcast
(To.ScalarLdFrag addr:$src2)))),
imm:$src3))),
timm:$src3))),
To.RC:$src0)),
(!cast<Instruction>(OpcodeStr#"rmbik") To.RC:$src0, To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
@ -10659,7 +10659,7 @@ multiclass avx512_vpalign_mask_lowering_mb<string OpcodeStr, SDNode OpNode,
(bitconvert
(To.VT (X86VBroadcast
(To.ScalarLdFrag addr:$src2)))),
imm:$src3))),
timm:$src3))),
To.ImmAllZerosV)),
(!cast<Instruction>(OpcodeStr#"rmbikz") To.KRCWM:$mask,
To.RC:$src1, addr:$src2,
@ -11103,14 +11103,14 @@ multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
def rr : AVX512<opc, MRMr,
(outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
[(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 timm:$src2))))]>,
Sched<[sched]>;
def rm : AVX512<opc, MRMm,
(outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst,(_.VT (OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
(i8 imm:$src2))))]>,
(i8 timm:$src2))))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@ -11243,7 +11243,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT _.RC:$src3),
(i8 imm:$src4)), 1, 1>,
(i8 timm:$src4)), 1, 1>,
AVX512AIi8Base, EVEX_4V, Sched<[sched]>;
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
@ -11251,7 +11251,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT (bitconvert (_.LdFrag addr:$src3))),
(i8 imm:$src4)), 1, 0>,
(i8 timm:$src4)), 1, 0>,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
@ -11261,31 +11261,31 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
(i8 imm:$src4)), 1, 0>, EVEX_B,
(i8 timm:$src4)), 1, 0>, EVEX_B,
AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}// Constraints = "$src1 = $dst"
// Additional patterns for matching passthru operand in other positions.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
(OpNode _.RC:$src3, _.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, _.RC:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 imm:$src4)),
(OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rrik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, _.RC:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
// Additional patterns for matching loads in other positions.
def : Pat<(_.VT (OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
(!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (OpNode _.RC:$src1,
(bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, (i8 imm:$src4))),
_.RC:$src2, (i8 timm:$src4))),
(!cast<Instruction>(Name#_.ZSuffix#rmi) _.RC:$src1, _.RC:$src2,
addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
@ -11293,13 +11293,13 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
// positions.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, (i8 imm:$src4)),
_.RC:$src2, (i8 timm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmikz) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
@ -11308,43 +11308,43 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
// operand orders.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, (i8 imm:$src4)),
_.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1,
(bitconvert (_.LdFrag addr:$src3)), (i8 imm:$src4)),
(bitconvert (_.LdFrag addr:$src3)), (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src1, (i8 imm:$src4)),
_.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (bitconvert (_.LdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
_.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
// Additional patterns for matching broadcasts in other positions.
def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4))),
(!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, (i8 imm:$src4))),
_.RC:$src2, (i8 timm:$src4))),
(!cast<Instruction>(Name#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
@ -11352,7 +11352,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
// positions.
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
_.KRCWM:$mask, _.RC:$src2, addr:$src3,
@ -11360,7 +11360,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, (i8 imm:$src4)),
_.RC:$src2, (i8 timm:$src4)),
_.ImmAllZerosV)),
(!cast<Instruction>(Name#_.ZSuffix#rmbikz) _.RC:$src1,
_.KRCWM:$mask, _.RC:$src2, addr:$src3,
@ -11371,32 +11371,32 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, (i8 imm:$src4)),
_.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
_.RC:$src2, _.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2, _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
(i8 imm:$src4)), _.RC:$src1)),
(i8 timm:$src4)), _.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode _.RC:$src2,
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src1, (i8 imm:$src4)),
_.RC:$src1, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
_.RC:$src1, _.RC:$src2, (i8 timm:$src4)),
_.RC:$src1)),
(!cast<Instruction>(Name#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
@ -11531,14 +11531,14 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
(X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(TblVT.VT _.RC:$src3),
(i32 imm:$src4))>, Sched<[sched]>;
(i32 timm:$src4))>, Sched<[sched]>;
defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
(X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(TblVT.VT (bitconvert (TblVT.LdFrag addr:$src3))),
(i32 imm:$src4))>,
(i32 timm:$src4))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
@ -11547,7 +11547,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr,
(X86VFixupimm (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(TblVT.VT (X86VBroadcast(TblVT.ScalarLdFrag addr:$src3))),
(i32 imm:$src4))>,
(i32 timm:$src4))>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
} // Constraints = "$src1 = $dst"
}
@ -11564,7 +11564,7 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
(X86VFixupimmSAE (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(TblVT.VT _.RC:$src3),
(i32 imm:$src4))>,
(i32 timm:$src4))>,
EVEX_B, Sched<[sched]>;
}
}
@ -11580,7 +11580,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
(X86VFixupimms (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_src3VT.VT _src3VT.RC:$src3),
(i32 imm:$src4))>, Sched<[sched]>;
(i32 timm:$src4))>, Sched<[sched]>;
defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
@ -11588,7 +11588,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
(X86VFixupimmSAEs (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(_src3VT.VT _src3VT.RC:$src3),
(i32 imm:$src4))>,
(i32 timm:$src4))>,
EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4),
@ -11597,13 +11597,13 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
(_.VT _.RC:$src2),
(_src3VT.VT (scalar_to_vector
(_src3VT.ScalarLdFrag addr:$src3))),
(i32 imm:$src4))>,
(i32 timm:$src4))>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
AVX512VLVectorVTInfo _Vec,
AVX512VLVectorVTInfo _Vec,
AVX512VLVectorVTInfo _Tbl> {
let Predicates = [HasAVX512] in
defm Z : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
@ -12072,7 +12072,7 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
"$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
(OpNode (VTI.VT VTI.RC:$src1),
(bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
(i8 imm:$src3))>, EVEX_B,
(i8 timm:$src3))>, EVEX_B,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}

View File

@ -114,13 +114,13 @@ multiclass ssse3_palign_mm<string asm, Intrinsic IntId,
def rri : MMXSS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>,
[(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 timm:$src3)))]>,
Sched<[sched]>;
def rmi : MMXSS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, u8imm:$src3),
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR64:$dst, (IntId VR64:$src1,
(bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>,
(bitconvert (load_mmx addr:$src2)), (i8 timm:$src3)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@ -496,14 +496,14 @@ def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
(outs VR64:$dst), (ins VR64:$src1, u8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
(int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>,
(int_x86_sse_pshuf_w VR64:$src1, timm:$src2))]>,
Sched<[SchedWriteShuffle.MMX]>;
def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
(outs VR64:$dst), (ins i64mem:$src1, u8imm:$src2),
"pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR64:$dst,
(int_x86_sse_pshuf_w (load_mmx addr:$src1),
imm:$src2))]>,
timm:$src2))]>,
Sched<[SchedWriteShuffle.MMX.Folded]>;
// -- Conversion Instructions

View File

@ -370,7 +370,7 @@ defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd
defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups",
SSEPackedSingle, SchedWriteFMoveLS.YMM>,
PS, VEX, VEX_L, VEX_WIG;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
SSEPackedDouble, SchedWriteFMoveLS.YMM>,
PD, VEX, VEX_L, VEX_WIG;
}
@ -1728,12 +1728,12 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
let isCommutable = 1 in
def rr : SIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))]>,
[(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, timm:$cc))]>,
Sched<[sched]>;
def rm : SIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
[(set RC:$dst, (OpNode (VT RC:$src1),
(ld_frag addr:$src2), imm:$cc))]>,
(ld_frag addr:$src2), timm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@ -1766,13 +1766,13 @@ multiclass sse12_cmp_scalar_int<Operand memop,
def rr_Int : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src, u8imm:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
VR128:$src, imm:$cc))]>,
VR128:$src, timm:$cc))]>,
Sched<[sched]>;
let mayLoad = 1 in
def rm_Int : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src, u8imm:$cc), asm,
[(set VR128:$dst, (Int VR128:$src1,
mem_cpat:$src, imm:$cc))]>,
mem_cpat:$src, timm:$cc))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@ -1891,12 +1891,12 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
let isCommutable = 1 in
def rri : PIi8<0xC2, MRMSrcReg,
(outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), asm,
[(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, imm:$cc)))], d>,
[(set RC:$dst, (VT (X86cmpp RC:$src1, RC:$src2, timm:$cc)))], d>,
Sched<[sched]>;
def rmi : PIi8<0xC2, MRMSrcMem,
(outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm,
[(set RC:$dst,
(VT (X86cmpp RC:$src1, (ld_frag addr:$src2), imm:$cc)))], d>,
(VT (X86cmpp RC:$src1, (ld_frag addr:$src2), timm:$cc)))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@ -1921,7 +1921,7 @@ let Constraints = "$src1 = $dst" in {
SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, memopv2f64>, PD;
}
def CommutableCMPCC : PatLeaf<(imm), [{
def CommutableCMPCC : PatLeaf<(timm), [{
uint64_t Imm = N->getZExtValue() & 0x7;
return (Imm == 0x00 || Imm == 0x03 || Imm == 0x04 || Imm == 0x07);
}]>;
@ -1985,13 +1985,13 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
(i8 imm:$src3))))], d>,
(i8 timm:$src3))))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
let isCommutable = IsCommutable in
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
(i8 imm:$src3))))], d>,
(i8 timm:$src3))))], d>,
Sched<[sched]>;
}
@ -2736,7 +2736,7 @@ defm : scalar_math_patterns<fadd, "ADDSD", X86Movsd, v2f64, f64, FR64, loadf64,
defm : scalar_math_patterns<fsub, "SUBSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
defm : scalar_math_patterns<fmul, "MULSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
defm : scalar_math_patterns<fdiv, "DIVSD", X86Movsd, v2f64, f64, FR64, loadf64, UseSSE2>;
/// Unop Arithmetic
/// In addition, we also have a special variant of the scalar form here to
/// represent the associated intrinsic operation. This form is unlike the
@ -3497,7 +3497,7 @@ multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))]>,
[(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 timm:$src2))))]>,
Sched<[schedImm]>;
}
@ -3529,7 +3529,7 @@ multiclass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr,
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set RC:$dst, (VT (OpNode RC:$src1, (i8 imm:$src2))))]>,
[(set RC:$dst, (VT (OpNode RC:$src1, (i8 timm:$src2))))]>,
Sched<[sched]>;
}
@ -3612,7 +3612,7 @@ let Predicates = [HasAVX, prd] in {
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>,
(vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>,
VEX, Sched<[sched.XMM]>, VEX_WIG;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
@ -3620,7 +3620,7 @@ let Predicates = [HasAVX, prd] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (load addr:$src1),
(i8 imm:$src2))))]>, VEX,
(i8 timm:$src2))))]>, VEX,
Sched<[sched.XMM.Folded]>, VEX_WIG;
}
@ -3630,7 +3630,7 @@ let Predicates = [HasAVX2, prd] in {
!strconcat("v", OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (i8 imm:$src2))))]>,
(vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>,
VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, u8imm:$src2),
@ -3638,7 +3638,7 @@ let Predicates = [HasAVX2, prd] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode (load addr:$src1),
(i8 imm:$src2))))]>, VEX, VEX_L,
(i8 timm:$src2))))]>, VEX, VEX_L,
Sched<[sched.YMM.Folded]>, VEX_WIG;
}
@ -3648,7 +3648,7 @@ let Predicates = [UseSSE2] in {
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>,
(vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>,
Sched<[sched.XMM]>;
def mi : Ii8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2),
@ -3656,7 +3656,7 @@ let Predicates = [UseSSE2] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (memop addr:$src1),
(i8 imm:$src2))))]>,
(i8 timm:$src2))))]>,
Sched<[sched.XMM.Folded]>;
}
}
@ -4827,7 +4827,7 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
!strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 imm:$src3))))]>,
[(set RC:$dst, (VT (X86PAlignr RC:$src1, RC:$src2, (i8 timm:$src3))))]>,
Sched<[sched]>;
let mayLoad = 1 in
def rmi : SS3AI<0x0F, MRMSrcMem, (outs RC:$dst),
@ -4838,7 +4838,7 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, (VT (X86PAlignr RC:$src1,
(memop_frag addr:$src2),
(i8 imm:$src3))))]>,
(i8 timm:$src3))))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
@ -5315,7 +5315,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(X86insertps VR128:$src1, VR128:$src2, imm:$src3))]>,
(X86insertps VR128:$src1, VR128:$src2, timm:$src3))]>,
Sched<[SchedWriteFShuffle.XMM]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src2, u8imm:$src3),
@ -5326,7 +5326,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
[(set VR128:$dst,
(X86insertps VR128:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))]>,
timm:$src3))]>,
Sched<[SchedWriteFShuffle.XMM.Folded, SchedWriteFShuffle.XMM.ReadAfterFold]>;
}
@ -5352,7 +5352,7 @@ multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
(outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (VT (OpNode RC:$src1, imm:$src2)))]>,
[(set RC:$dst, (VT (OpNode RC:$src1, timm:$src2)))]>,
Sched<[sched]>;
// Vector intrinsic operation, mem
@ -5361,7 +5361,7 @@ multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(VT (OpNode (mem_frag addr:$src1),imm:$src2)))]>,
(VT (OpNode (mem_frag addr:$src1), timm:$src2)))]>,
Sched<[sched.Folded]>;
}
@ -5443,7 +5443,7 @@ let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in {
"ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>,
[(set VR128:$dst, (VT32 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>,
Sched<[sched]>;
def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
@ -5454,7 +5454,7 @@ let ExeDomain = SSEPackedSingle, isCodeGenOnly = 1 in {
!strconcat(OpcodeStr,
"ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(OpNode VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
(OpNode VR128:$src1, sse_load_f32:$src2, timm:$src3))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedSingle, isCodeGenOnly = 1
@ -5466,7 +5466,7 @@ let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
"sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, imm:$src3)))]>,
[(set VR128:$dst, (VT64 (OpNode VR128:$src1, VR128:$src2, timm:$src3)))]>,
Sched<[sched]>;
def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
@ -5477,7 +5477,7 @@ let ExeDomain = SSEPackedDouble, isCodeGenOnly = 1 in {
!strconcat(OpcodeStr,
"sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
(OpNode VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
(OpNode VR128:$src1, sse_load_f64:$src2, timm:$src3))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
} // ExeDomain = SSEPackedDouble, isCodeGenOnly = 1
}
@ -5512,16 +5512,16 @@ let Predicates = [UseAVX] in {
}
let Predicates = [UseAVX] in {
def : Pat<(X86VRndScale FR32:$src1, imm:$src2),
def : Pat<(X86VRndScale FR32:$src1, timm:$src2),
(VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, imm:$src2)>;
def : Pat<(X86VRndScale FR64:$src1, imm:$src2),
def : Pat<(X86VRndScale FR64:$src1, timm:$src2),
(VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, imm:$src2)>;
}
let Predicates = [UseAVX, OptForSize] in {
def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2),
def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2),
(VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>;
def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2),
def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2),
(VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, imm:$src2)>;
}
@ -5539,16 +5539,16 @@ defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales>;
let Predicates = [UseSSE41] in {
def : Pat<(X86VRndScale FR32:$src1, imm:$src2),
def : Pat<(X86VRndScale FR32:$src1, timm:$src2),
(ROUNDSSr FR32:$src1, imm:$src2)>;
def : Pat<(X86VRndScale FR64:$src1, imm:$src2),
def : Pat<(X86VRndScale FR64:$src1, timm:$src2),
(ROUNDSDr FR64:$src1, imm:$src2)>;
}
let Predicates = [UseSSE41, OptForSize] in {
def : Pat<(X86VRndScale (loadf32 addr:$src1), imm:$src2),
def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2),
(ROUNDSSm addr:$src1, imm:$src2)>;
def : Pat<(X86VRndScale (loadf64 addr:$src1), imm:$src2),
def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2),
(ROUNDSDm addr:$src1, imm:$src2)>;
}
@ -5830,7 +5830,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
[(set RC:$dst, (IntId RC:$src1, RC:$src2, timm:$src3))]>,
Sched<[sched]>;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
@ -5840,7 +5840,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
(IntId RC:$src1, (memop_frag addr:$src2), imm:$src3))]>,
(IntId RC:$src1, (memop_frag addr:$src2), timm:$src3))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@ -5857,7 +5857,7 @@ multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
Sched<[sched]>;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
@ -5867,7 +5867,7 @@ multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@ -6012,7 +6012,7 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
Sched<[sched]>;
def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
@ -6022,12 +6022,12 @@ let ExeDomain = d, Constraints = !if(Is2Addr, "$src1 = $dst", "") in {
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set RC:$dst,
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2), imm:$src3)))]>,
(OpVT (OpNode RC:$src1, (memop_frag addr:$src2), timm:$src3)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
}
// Pattern to commute if load is in first source.
def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, imm:$src3)),
def : Pat<(OpVT (OpNode (memop_frag addr:$src2), RC:$src1, timm:$src3)),
(!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
(commuteXForm imm:$src3))>;
}
@ -6065,36 +6065,36 @@ let Predicates = [HasAVX2] in {
// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw.
// ExecutionDomainFixPass will cleanup domains later on.
let Predicates = [HasAVX1Only] in {
def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3),
(VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$src3)>;
def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3),
(VBLENDPDYrmi VR256:$src1, addr:$src2, imm:$src3)>;
def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3),
(VBLENDPDYrmi VR256:$src1, addr:$src2, (BlendCommuteImm4 imm:$src3))>;
// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
// it from becoming movsd via commuting under optsize.
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
(VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>;
def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3),
(VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>;
def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3),
(VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>;
def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), imm:$src3),
def : Pat<(X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2), timm:$src3),
(VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$src3)>;
def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), imm:$src3),
def : Pat<(X86Blendi VR256:$src1, (loadv8i32 addr:$src2), timm:$src3),
(VBLENDPSYrmi VR256:$src1, addr:$src2, imm:$src3)>;
def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, imm:$src3),
def : Pat<(X86Blendi (loadv8i32 addr:$src2), VR256:$src1, timm:$src3),
(VBLENDPSYrmi VR256:$src1, addr:$src2, (BlendCommuteImm8 imm:$src3))>;
// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
// it from becoming movss via commuting under optsize.
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3),
(VPBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>;
def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), imm:$src3),
def : Pat<(X86Blendi VR128:$src1, (loadv4i32 addr:$src2), timm:$src3),
(VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, imm:$src3),
def : Pat<(X86Blendi (loadv4i32 addr:$src2), VR128:$src1, timm:$src3),
(VPBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
}
@ -6111,18 +6111,18 @@ defm PBLENDW : SS41I_blend_rmi<0x0E, "pblendw", X86Blendi, v8i16,
let Predicates = [UseSSE41] in {
// Use pblendw for 128-bit integer to keep it in the integer domain and prevent
// it from becoming movss via commuting under optsize.
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
(PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm2 imm:$src3))>;
def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), imm:$src3),
def : Pat<(X86Blendi VR128:$src1, (memopv2i64 addr:$src2), timm:$src3),
(PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm2 imm:$src3))>;
def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, imm:$src3),
def : Pat<(X86Blendi (memopv2i64 addr:$src2), VR128:$src1, timm:$src3),
(PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2 imm:$src3))>;
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), imm:$src3),
def : Pat<(X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2), timm:$src3),
(PBLENDWrri VR128:$src1, VR128:$src2, (BlendScaleImm4 imm:$src3))>;
def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), imm:$src3),
def : Pat<(X86Blendi VR128:$src1, (memopv4i32 addr:$src2), timm:$src3),
(PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, imm:$src3),
def : Pat<(X86Blendi (memopv4i32 addr:$src2), VR128:$src1, timm:$src3),
(PBLENDWrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
}
@ -6596,7 +6596,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
"sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
(int_x86_sha1rnds4 VR128:$src1, VR128:$src2,
(i8 imm:$src3)))]>, TA,
(i8 timm:$src3)))]>, TA,
Sched<[SchedWriteVecIMul.XMM]>;
def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, u8imm:$src3),
@ -6604,7 +6604,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in {
[(set VR128:$dst,
(int_x86_sha1rnds4 VR128:$src1,
(memop addr:$src2),
(i8 imm:$src3)))]>, TA,
(i8 timm:$src3)))]>, TA,
Sched<[SchedWriteVecIMul.XMM.Folded,
SchedWriteVecIMul.XMM.ReadAfterFold]>;
@ -6722,26 +6722,26 @@ let Predicates = [HasAVX, HasAES] in {
(ins VR128:$src1, u8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
(int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>,
Sched<[WriteAESKeyGen]>, VEX, VEX_WIG;
def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist (load addr:$src1), imm:$src2))]>,
(int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>,
Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG;
}
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, u8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
(int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>,
Sched<[WriteAESKeyGen]>;
def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist (memop addr:$src1), imm:$src2))]>,
(int_x86_aesni_aeskeygenassist (memop addr:$src1), timm:$src2))]>,
Sched<[WriteAESKeyGen.Folded]>;
//===----------------------------------------------------------------------===//
@ -6762,7 +6762,7 @@ let Predicates = [NoAVX, HasPCLMUL] in {
(ins VR128:$src1, VR128:$src2, u8imm:$src3),
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
(int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>,
(int_x86_pclmulqdq VR128:$src1, VR128:$src2, timm:$src3))]>,
Sched<[WriteCLMul]>;
def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
@ -6770,12 +6770,12 @@ let Predicates = [NoAVX, HasPCLMUL] in {
"pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
[(set VR128:$dst,
(int_x86_pclmulqdq VR128:$src1, (memop addr:$src2),
imm:$src3))]>,
timm:$src3))]>,
Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
} // Constraints = "$src1 = $dst"
def : Pat<(int_x86_pclmulqdq (memop addr:$src2), VR128:$src1,
(i8 imm:$src3)),
(i8 timm:$src3)),
(PCLMULQDQrm VR128:$src1, addr:$src2,
(PCLMULCommuteImm imm:$src3))>;
} // Predicates = [NoAVX, HasPCLMUL]
@ -6799,19 +6799,19 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
(ins RC:$src1, RC:$src2, u8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set RC:$dst,
(IntId RC:$src1, RC:$src2, imm:$src3))]>,
(IntId RC:$src1, RC:$src2, timm:$src3))]>,
Sched<[WriteCLMul]>;
def rm : PCLMULIi8<0x44, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, MemOp:$src2, u8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set RC:$dst,
(IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>,
(IntId RC:$src1, (LdFrag addr:$src2), timm:$src3))]>,
Sched<[WriteCLMul.Folded, WriteCLMul.ReadAfterFold]>;
// We can commute a load in the first operand by swapping the sources and
// rotating the immediate.
def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 imm:$src3)),
def : Pat<(IntId (LdFrag addr:$src2), RC:$src1, (i8 timm:$src3)),
(!cast<Instruction>(NAME#"rm") RC:$src1, addr:$src2,
(PCLMULCommuteImm imm:$src3))>;
}
@ -6857,8 +6857,8 @@ let Constraints = "$src = $dst" in {
def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
(ins VR128:$src, u8imm:$len, u8imm:$idx),
"extrq\t{$idx, $len, $src|$src, $len, $idx}",
[(set VR128:$dst, (X86extrqi VR128:$src, imm:$len,
imm:$idx))]>,
[(set VR128:$dst, (X86extrqi VR128:$src, timm:$len,
timm:$idx))]>,
PD, Sched<[SchedWriteVecALU.XMM]>;
def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$mask),
@ -6871,7 +6871,7 @@ def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx),
"insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}",
[(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2,
imm:$len, imm:$idx))]>,
timm:$len, timm:$idx))]>,
XD, Sched<[SchedWriteVecALU.XMM]>;
def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src, VR128:$mask),
@ -7142,13 +7142,13 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
[(set RC:$dst, (f_vt (X86VPermilpi RC:$src1, (i8 timm:$src2))))]>, VEX,
Sched<[sched]>;
def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
(ins x86memop_f:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set RC:$dst,
(f_vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX,
(f_vt (X86VPermilpi (load addr:$src1), (i8 timm:$src2))))]>, VEX,
Sched<[sched.Folded]>;
}// Predicates = [HasAVX, NoVLX]
}
@ -7180,13 +7180,13 @@ def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, u8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
(i8 imm:$src3))))]>, VEX_4V, VEX_L,
(i8 timm:$src3))))]>, VEX_4V, VEX_L,
Sched<[WriteFShuffle256]>;
def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, u8imm:$src3),
"vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4f64 addr:$src2),
(i8 imm:$src3)))]>, VEX_4V, VEX_L,
(i8 timm:$src3)))]>, VEX_4V, VEX_L,
Sched<[WriteFShuffle256.Folded, WriteFShuffle256.ReadAfterFold]>;
}
@ -7198,19 +7198,19 @@ def Perm2XCommuteImm : SDNodeXForm<imm, [{
let Predicates = [HasAVX] in {
// Pattern with load in other operand.
def : Pat<(v4f64 (X86VPerm2x128 (loadv4f64 addr:$src2),
VR256:$src1, (i8 imm:$imm))),
VR256:$src1, (i8 timm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
}
let Predicates = [HasAVX1Only] in {
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 timm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
(loadv4i64 addr:$src2), (i8 imm:$imm))),
(loadv4i64 addr:$src2), (i8 timm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
// Pattern with load in other operand.
def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
VR256:$src1, (i8 imm:$imm))),
VR256:$src1, (i8 timm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
}
@ -7256,7 +7256,7 @@ multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop,
def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst),
(ins RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (X86cvtps2ph RC:$src1, imm:$src2))]>,
[(set VR128:$dst, (X86cvtps2ph RC:$src1, timm:$src2))]>,
TAPD, VEX, Sched<[RR]>;
let hasSideEffects = 0, mayStore = 1 in
def mr : Ii8<0x1D, MRMDestMem, (outs),
@ -7326,18 +7326,18 @@ multiclass AVX2_blend_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins RC:$src1, RC:$src2, u8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>,
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))]>,
Sched<[sched]>, VEX_4V;
def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
(OpVT (OpNode RC:$src1, (load addr:$src2), imm:$src3)))]>,
(OpVT (OpNode RC:$src1, (load addr:$src2), timm:$src3)))]>,
Sched<[sched.Folded, sched.ReadAfterFold]>, VEX_4V;
// Pattern to commute if load is in first source.
def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, imm:$src3)),
def : Pat<(OpVT (OpNode (load addr:$src2), RC:$src1, timm:$src3)),
(!cast<Instruction>(NAME#"rmi") RC:$src1, addr:$src2,
(commuteXForm imm:$src3))>;
}
@ -7350,18 +7350,18 @@ defm VPBLENDDY : AVX2_blend_rmi<0x02, "vpblendd", X86Blendi, v8i32,
SchedWriteBlend.YMM, VR256, i256mem,
BlendCommuteImm8>, VEX_L;
def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), imm:$src3),
def : Pat<(X86Blendi (v4i64 VR256:$src1), (v4i64 VR256:$src2), timm:$src3),
(VPBLENDDYrri VR256:$src1, VR256:$src2, (BlendScaleImm4 imm:$src3))>;
def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), imm:$src3),
def : Pat<(X86Blendi VR256:$src1, (loadv4i64 addr:$src2), timm:$src3),
(VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleImm4 imm:$src3))>;
def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, imm:$src3),
def : Pat<(X86Blendi (loadv4i64 addr:$src2), VR256:$src1, timm:$src3),
(VPBLENDDYrmi VR256:$src1, addr:$src2, (BlendScaleCommuteImm4 imm:$src3))>;
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), imm:$src3),
def : Pat<(X86Blendi (v2i64 VR128:$src1), (v2i64 VR128:$src2), timm:$src3),
(VPBLENDDrri VR128:$src1, VR128:$src2, (BlendScaleImm2to4 imm:$src3))>;
def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), imm:$src3),
def : Pat<(X86Blendi VR128:$src1, (loadv2i64 addr:$src2), timm:$src3),
(VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleImm2to4 imm:$src3))>;
def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, imm:$src3),
def : Pat<(X86Blendi (loadv2i64 addr:$src2), VR128:$src1, timm:$src3),
(VPBLENDDrmi VR128:$src1, addr:$src2, (BlendScaleCommuteImm2to4 imm:$src3))>;
}
@ -7611,7 +7611,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>,
(OpVT (X86VPermi VR256:$src1, (i8 timm:$src2))))]>,
Sched<[Sched]>, VEX, VEX_L;
def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst),
(ins memOp:$src1, u8imm:$src2),
@ -7619,7 +7619,7 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(OpVT (X86VPermi (mem_frag addr:$src1),
(i8 imm:$src2))))]>,
(i8 timm:$src2))))]>,
Sched<[Sched.Folded, Sched.ReadAfterFold]>, VEX, VEX_L;
}
}
@ -7638,18 +7638,18 @@ def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, u8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2,
(i8 imm:$src3))))]>, Sched<[WriteShuffle256]>,
(i8 timm:$src3))))]>, Sched<[WriteShuffle256]>,
VEX_4V, VEX_L;
def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, u8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2),
(i8 imm:$src3)))]>,
(i8 timm:$src3)))]>,
Sched<[WriteShuffle256.Folded, WriteShuffle256.ReadAfterFold]>, VEX_4V, VEX_L;
let Predicates = [HasAVX2] in
def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
VR256:$src1, (i8 imm:$imm))),
VR256:$src1, (i8 timm:$imm))),
(VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
@ -7931,13 +7931,13 @@ multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT,
OpStr##"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}") in {
def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), "",
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))],
[(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, timm:$src3)))],
SSEPackedInt>, Sched<[SchedWriteVecALU.XMM]>;
def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "",
[(set RC:$dst, (OpVT (OpNode RC:$src1,
(MemOpFrag addr:$src2),
imm:$src3)))], SSEPackedInt>,
timm:$src3)))], SSEPackedInt>,
Sched<[SchedWriteVecALU.XMM.Folded, SchedWriteVecALU.XMM.ReadAfterFold]>;
}
}

View File

@ -43,7 +43,7 @@ def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>;
let SchedRW = [WriteSystem] in {
def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap",
[(int_x86_int imm:$trap)]>;
[(int_x86_int timm:$trap)]>;
def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;

View File

@ -45,7 +45,7 @@ def XTEST : I<0x01, MRM_D6, (outs), (ins),
def XABORT : Ii8<0xc6, MRM_F8, (outs), (ins i8imm:$imm),
"xabort\t$imm",
[(int_x86_xabort imm:$imm)]>, Requires<[HasRTM]>;
[(int_x86_xabort timm:$imm)]>, Requires<[HasRTM]>;
} // SchedRW
// HLE prefixes

View File

@ -143,13 +143,13 @@ multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>,
(vt128 (OpNode (vt128 VR128:$src1), timm:$src2)))]>,
XOP, Sched<[sched]>;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 (load addr:$src1)), imm:$src2)))]>,
(vt128 (OpNode (vt128 (load addr:$src1)), timm:$src2)))]>,
XOP, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
@ -251,7 +251,7 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
"\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
imm:$cc)))]>,
timm:$cc)))]>,
XOP_4V, Sched<[sched]>;
def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, u8imm:$cc),
@ -260,12 +260,12 @@ multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128,
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1),
(vt128 (load addr:$src2)),
imm:$cc)))]>,
timm:$cc)))]>,
XOP_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
def : Pat<(OpNode (load addr:$src2),
(vt128 VR128:$src1), imm:$cc),
(vt128 VR128:$src1), timm:$cc),
(!cast<Instruction>(NAME#"mi") VR128:$src1, addr:$src2,
(CommuteVPCOMCC imm:$cc))>;
}
@ -422,7 +422,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>,
(VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 timm:$src4))))]>,
Sched<[sched]>;
def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, intmemop:$src3, u4imm:$src4),
@ -430,7 +430,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, RC:$src2, (IntLdFrag addr:$src3),
(i8 imm:$src4))))]>, VEX_W,
(i8 timm:$src4))))]>, VEX_W,
Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, fpmemop:$src2, RC:$src3, u4imm:$src4),
@ -438,7 +438,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2),
RC:$src3, (i8 imm:$src4))))]>,
RC:$src3, (i8 timm:$src4))))]>,
Sched<[sched.Folded, sched.ReadAfterFold,
// fpmemop:$src2
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,

View File

@ -389,8 +389,7 @@ define void @store(i64* %addr, i64 addrspace(42)* %addr42, i64 %val1, i64 %val2)
; CHECK-LABEL: name: intrinsics
; CHECK: [[CUR:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[BITS:%[0-9]+]]:_(s32) = COPY $w1
; CHECK: [[CREG:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), [[CREG]]
; CHECK: [[PTR:%[0-9]+]]:_(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0
; CHECK: [[PTR_VEC:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ptr.vec
; CHECK: [[VEC:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_VEC]]
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.neon.st2), [[VEC]](<8 x s8>), [[VEC]](<8 x s8>), [[PTR]](p0)

View File

@ -10,24 +10,20 @@ body: |
bb.0:
liveins: $vgpr0
%0:vgpr(s32) = COPY $vgpr0
%1:sgpr(s32) = G_CONSTANT i32 1
%2:sgpr(s32) = G_CONSTANT i32 15
%3:sgpr(s1) = G_CONSTANT i1 0
%4:sgpr(s1) = G_CONSTANT i1 1
; CHECK: EXP 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %1:sgpr(s32), %2:sgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %3:sgpr(s1), %3:sgpr(s1)
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp),1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), 0, 0
; CHECK: EXP_DONE 1, %0, %0, %0, %0, 0, 0, 15, implicit $exec
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %1:sgpr(s32), %2:sgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %4:sgpr(s1), %3:sgpr(s1)
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 1, 15, %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), %0:vgpr(s32), 1, 0
%5:vgpr(<2 x s16>) = G_BITCAST %0(s32)
; CHECK: [[UNDEF0:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK: EXP 1, %0, %0, [[UNDEF0]], [[UNDEF0]], 0, 1, 15, implicit $exec
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), %1:sgpr(s32), %2:sgpr(s32), %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), %3:sgpr(s1), %3:sgpr(s1)
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), 0, 0
; CHECK: [[UNDEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK: EXP_DONE 1, %0, %0, [[UNDEF1]], [[UNDEF1]], 0, 1, 15, implicit $exec
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), %1:sgpr(s32), %2:sgpr(s32), %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), %4:sgpr(s1), %3:sgpr(s1)
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.compr), 1, 15, %5:vgpr(<2 x s16>), %5:vgpr(<2 x s16>), 1, 0

View File

@ -18,8 +18,7 @@ body: |
; GCN: S_SENDMSG 1, implicit $exec, implicit $m0
; GCN: S_ENDPGM 0
%0:sgpr(s32) = COPY $sgpr0
%2:sgpr(s32) = G_CONSTANT i32 1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), %2(s32), %0(s32)
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 1, %0(s32)
S_ENDPGM 0
...

View File

@ -0,0 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -march=amdgcn -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s
declare void @llvm.amdgcn.s.sendmsg(i32 immarg, i32)
define amdgpu_ps void @test_sendmsg(i32 inreg %m0) {
; CHECK-LABEL: name: test_sendmsg
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr0
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.sendmsg), 12, [[COPY]](s32)
; CHECK: S_ENDPGM
call void @llvm.amdgcn.s.sendmsg(i32 12, i32 %m0)
ret void
}

View File

@ -9,10 +9,7 @@ define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[C2]](s1), [[C2]](s1)
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0
; CHECK: S_ENDPGM 0
main_body:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
@ -27,17 +24,14 @@ define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float }
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[C2]](s1), [[C2]](s1)
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0
; CHECK: S_ENDPGM 0
main_body:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
ret void
}
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0
attributes #0 = { nounwind }
attributes #1 = { "InitialPSInputAddr"="0x00002" }

View File

@ -1,9 +1,8 @@
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
; CHECK-LABEL: name: test_f32_inreg
; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]]
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[S0]]
define amdgpu_vs void @test_f32_inreg(float inreg %arg0) {
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
ret void
@ -11,7 +10,7 @@ define amdgpu_vs void @test_f32_inreg(float inreg %arg0) {
; CHECK-LABEL: name: test_f32
; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[V0]]
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[V0]]
define amdgpu_vs void @test_f32(float %arg0) {
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
ret void
@ -33,7 +32,7 @@ define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) {
; CHECK: [[S4:%[0-9]+]]:_(s32) = COPY $sgpr4
; CHECK: [[S34:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[S3]](s32), [[S4]](s32)
; CHECK: G_LOAD [[S34]]
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S2]]
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[S2]](s32)
define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) {
%tmp0 = load volatile i32, i32 addrspace(4)* %arg1
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
@ -45,7 +44,7 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)*
; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr3
; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[V1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[V0]](s32), [[S0]](s32), [[V1]](s32), [[S1]](s32)
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[V0]](s32), [[S0]](s32), [[V1]](s32), [[S1]](s32)
define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %arg2, float %arg3) {
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg2, float %arg0, float %arg3, float %arg1, i1 false, i1 false) #0
ret void

View File

@ -9,14 +9,13 @@ define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) {
; CHECK: liveins: $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), [[C]](s1)
; CHECK: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1
; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1)
; CHECK: G_STORE [[INT]](s32), [[DEF]](p1) :: (store 4 into `float addrspace(1)* undef`, addrspace 1)
; CHECK: G_STORE [[SEXT]](s32), [[DEF1]](p1) :: (store 4 into `i32 addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM
; CHECK: S_ENDPGM 0
%call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true)
%extract0 = extractvalue { float, i1 } %call, 0
%extract1 = extractvalue { float, i1 } %call, 1

View File

@ -0,0 +1,519 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck -check-prefix=UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck -check-prefix=PACKED %s
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16(<4 x i32> inreg %rsrc, half %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16(<4 x i32> inreg %rsrc, half %val, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> inreg %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Make sure unpack code is emitted outside of loop
define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: successors: %bb.2(0x80000000)
; UNPACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
; UNPACKED: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; UNPACKED: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; UNPACKED: bb.2:
; UNPACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; UNPACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec
; UNPACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; UNPACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec
; UNPACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec
; UNPACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; UNPACKED: bb.4:
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
; PACKED: bb.1 (%ir-block.0):
; PACKED: successors: %bb.2(0x80000000)
; PACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; PACKED: bb.2:
; PACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; PACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
; PACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; PACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; PACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec
; PACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; PACKED: bb.4:
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: %11:vgpr_32, dead %21:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: %11:vgpr_32, dead %22:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; UNPACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; UNPACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; UNPACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; UNPACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: %11:vgpr_32, dead %22:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096
; PACKED: bb.1 (%ir-block.0):
; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; PACKED: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; PACKED: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; PACKED: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; PACKED: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
; Check what happens with offset add inside a waterfall loop
define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16_add_4096(<4 x i32> %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
; UNPACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16_add_4096
; UNPACKED: bb.1 (%ir-block.0):
; UNPACKED: successors: %bb.2(0x80000000)
; UNPACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; UNPACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; UNPACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; UNPACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; UNPACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; UNPACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; UNPACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; UNPACKED: %13:vgpr_32, dead %47:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
; UNPACKED: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY4]], implicit $exec
; UNPACKED: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY10]], [[COPY5]], implicit $exec
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
; UNPACKED: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; UNPACKED: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; UNPACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; UNPACKED: bb.2:
; UNPACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; UNPACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec
; UNPACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec
; UNPACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; UNPACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec
; UNPACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec
; UNPACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec
; UNPACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; UNPACKED: bb.3:
; UNPACKED: successors: %bb.4(0x80000000)
; UNPACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; UNPACKED: bb.4:
; UNPACKED: S_ENDPGM 0
; PACKED-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16_add_4096
; PACKED: bb.1 (%ir-block.0):
; PACKED: successors: %bb.2(0x80000000)
; PACKED: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; PACKED: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; PACKED: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; PACKED: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; PACKED: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; PACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; PACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; PACKED: %13:vgpr_32, dead %31:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
; PACKED: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; PACKED: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; PACKED: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; PACKED: bb.2:
; PACKED: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; PACKED: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec
; PACKED: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec
; PACKED: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; PACKED: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
; PACKED: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec
; PACKED: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec
; PACKED: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY10]], implicit $exec
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; PACKED: bb.3:
; PACKED: successors: %bb.4(0x80000000)
; PACKED: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; PACKED: bb.4:
; PACKED: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
declare void @llvm.amdgcn.raw.buffer.store.format.f16(half, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg)

View File

@ -0,0 +1,314 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -o - %s | FileCheck %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=instruction-select -o - %s | FileCheck %s
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f32(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32(<4 x i32> inreg %rsrc, <3 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> inreg %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
; CHECK: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY10]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY10]], implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4095(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4095
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4096(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_soffset4096
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_16(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4095(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4095
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4096(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32_voffset_add_4096
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
; Check what happens with offset add inside a waterfall loop
define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32_add_4096(<4 x i32> %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32_add_4096
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr7
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr8
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; CHECK: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %15:vgpr_32, dead %33:sreg_64_xexec = V_ADD_I32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec
; CHECK: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY11]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY11]], implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY12]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
declare void @llvm.amdgcn.raw.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg)

View File

@ -0,0 +1,791 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s
; FIXME: Test with SI when argument lowering not broken for f16
; Natural mapping
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Copies for VGPR arguments
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float inreg %val, i32 inreg %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr8
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Waterfall for rsrc
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Waterfall for soffset
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Waterfall for rsrc and soffset
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, float %val, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY7]], implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_dlc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_slc_dlc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_dlc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc_dlc(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_glc_slc_dlc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32(<4 x i32> inreg %rsrc, <2 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32(<4 x i32> inreg %rsrc, <3 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v3f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32(<4 x i32> inreg %rsrc, <4 x float> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i8(<4 x i32> inreg %rsrc, i32 %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i8
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom TargetCustom7, addrspace 4)
; CHECK: S_ENDPGM 0
%val.trunc = trunc i32 %val to i8
call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i16(<4 x i32> inreg %rsrc, i32 %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_i16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%val.trunc = trunc i32 %val to i16
call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16(<4 x i32> inreg %rsrc, half %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> inreg %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16(<4 x i32> %rsrc, <4 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4095(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4095
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4096(<4 x i32> inreg %rsrc, float %val, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_voffset4096
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_16(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4095(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4095
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4096(<4 x i32> inreg %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f32_voffset_add_4096
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_16(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 16
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4095(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4095
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4095
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4096(<4 x i32> inreg %rsrc, <2 x half> %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset_add_4096
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 4096
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop.
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_offset_add_5000(<4 x i32> %rsrc, float %val, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_offset_add_5000
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 5000
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; CHECK: %11:vgpr_32, dead %29:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
; CHECK: [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY8]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY8]], implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY9]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
%voffset.add = add i32 %voffset, 5000
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
ret void
}
; An add of the offset is necessary, with a waterfall loop. Make sure the add is done outside of the waterfall loop.
define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr_soffset_offset(<4 x i32> %rsrc, float %val, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr_soffset_offset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
; CHECK: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
; CHECK: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec
; CHECK: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 5000, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 5000, i32 %soffset, i32 0)
ret void
}
declare void @llvm.amdgcn.raw.buffer.store.i8(i8, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.i16(i16, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.f16(half, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg)
declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg)

View File

@ -0,0 +1,45 @@
; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare void @llvm.amdgcn.s.sleep(i32) #0
; GCN-LABEL: {{^}}test_s_sleep:
; GCN: s_sleep 0{{$}}
; GCN: s_sleep 1{{$}}
; GCN: s_sleep 2{{$}}
; GCN: s_sleep 3{{$}}
; GCN: s_sleep 4{{$}}
; GCN: s_sleep 5{{$}}
; GCN: s_sleep 6{{$}}
; GCN: s_sleep 7{{$}}
; GCN: s_sleep 8{{$}}
; GCN: s_sleep 9{{$}}
; GCN: s_sleep 10{{$}}
; GCN: s_sleep 11{{$}}
; GCN: s_sleep 12{{$}}
; GCN: s_sleep 13{{$}}
; GCN: s_sleep 14{{$}}
; GCN: s_sleep 15{{$}}
define amdgpu_kernel void @test_s_sleep(i32 %x) #0 {
call void @llvm.amdgcn.s.sleep(i32 0)
call void @llvm.amdgcn.s.sleep(i32 1)
call void @llvm.amdgcn.s.sleep(i32 2)
call void @llvm.amdgcn.s.sleep(i32 3)
call void @llvm.amdgcn.s.sleep(i32 4)
call void @llvm.amdgcn.s.sleep(i32 5)
call void @llvm.amdgcn.s.sleep(i32 6)
call void @llvm.amdgcn.s.sleep(i32 7)
; Values that might only work on VI
call void @llvm.amdgcn.s.sleep(i32 8)
call void @llvm.amdgcn.s.sleep(i32 9)
call void @llvm.amdgcn.s.sleep(i32 10)
call void @llvm.amdgcn.s.sleep(i32 11)
call void @llvm.amdgcn.s.sleep(i32 12)
call void @llvm.amdgcn.s.sleep(i32 13)
call void @llvm.amdgcn.s.sleep(i32 14)
call void @llvm.amdgcn.s.sleep(i32 15)
ret void
}
attributes #0 = { nounwind }

View File

@ -23,28 +23,20 @@ body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3
; CHECK-LABEL: name: exp_s
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[C2:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false
; CHECK: [[C3:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[C2]](s1), [[C3]](s1)
%0:_(s32) = G_CONSTANT i32 0
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = COPY $sgpr0
%3:_(s32) = COPY $sgpr1
%4:_(s32) = COPY $sgpr2
%5:_(s32) = COPY $sgpr3
%6:_(s1) = G_CONSTANT i1 0
%7:_(s1) = G_CONSTANT i1 0
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), %0, %1, %2, %3, %4, %5, %6, %7
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0, 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
%3:_(s32) = COPY $sgpr3
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), 0, 0, %0, %1, %2, %3, 0, 0
...
---
name: exp_v
@ -54,22 +46,14 @@ body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK-LABEL: name: exp_v
; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[C2:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false
; CHECK: [[C3:%[0-9]+]]:sgpr(s1) = G_CONSTANT i1 false
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[C2]](s1), [[C3]](s1)
%0:_(s32) = G_CONSTANT i32 0
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = COPY $vgpr0
%3:_(s32) = COPY $vgpr1
%4:_(s32) = COPY $vgpr2
%5:_(s32) = COPY $vgpr3
%6:_(s1) = G_CONSTANT i1 0
%7:_(s1) = G_CONSTANT i1 0
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), %0, %1, %2, %3, %4, %5, %6, %7
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), 0, 0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(s32) = COPY $vgpr3
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), 0, 0, %0, %1, %2, %3, 0, 0
...

View File

@ -0,0 +1,21 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs %s -o - | FileCheck %s
---
name: ds_swizzle_s
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0
; CHECK-LABEL: name: ds_swizzle_s
; CHECK: liveins: $sgpr0
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), [[COPY1]](s32), 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ds.swizzle), %0, 0
...

View File

@ -0,0 +1,181 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
; Natural mapping
define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 %s) {
; CHECK-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
; Copy needed for VGPR argument
define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32 inreg %s) {
; CHECK-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
; Waterfall loop needed for rsrc
define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
; CHECK-LABEL: name: load_1d_vgpr_vaddr__vgpr_srsrc
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %19, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY9]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
; Waterfall loop needed for rsrc, copy needed for vaddr
define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg %s) {
; CHECK-LABEL: name: load_1d_sgpr_vaddr__vgpr_srsrc
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %20, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %12(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR1]](<8 x s32>), 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY10]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
attributes #0 = { nounwind readonly }

View File

@ -0,0 +1,268 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
; Natural mapping
define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
; CHECK-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $vgpr0
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
; Copy required for VGPR input
define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float inreg %s) {
; CHECK-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr10
; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr11
; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr12
; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr13
; CHECK: [[COPY12:%[0-9]+]]:sgpr(s32) = COPY $sgpr14
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[COPY14:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY14]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
; Waterfall loop for rsrc
define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsrc, <4 x i32> inreg %samp, float %s) {
; CHECK-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
; CHECK: [[COPY8:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY9:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY10:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY11:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_2]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
; Waterfall loop for sampler
define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inreg %rsrc, <4 x i32> %samp, float %s) {
; CHECK-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR2]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
; Waterfall loop for rsrc and sampler
define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsrc, <4 x i32> %samp, float %s) {
; CHECK-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY $vgpr7
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY $vgpr8
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY $vgpr9
; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY $vgpr10
; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY $vgpr11
; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr12
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:vgpr(<4 x s32>) = G_IMPLICIT_DEF
; CHECK: [[DEF2:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64), [[UV2:%[0-9]+]]:vreg_64(s64), [[UV3:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s32>)
; CHECK: [[UV4:%[0-9]+]]:vreg_64(s64), [[UV5:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF2]], %bb.1, %24, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(<4 x s32>) = G_PHI [[DEF1]](<4 x s32>), %bb.1, %17(<4 x s32>), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV2]].sub1(s64), implicit $exec
; CHECK: [[MV2:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV2]](s64), [[UV2]](s64), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_2]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV3]].sub1(s64), implicit $exec
; CHECK: [[MV3:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV3]](s64), [[UV3]](s64), implicit $exec
; CHECK: [[S_AND_B64_2:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_3]], [[S_AND_B64_1]], implicit-def $scc
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:sgpr(<8 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32), [[V_READFIRSTLANE_B32_4]](s32), [[V_READFIRSTLANE_B32_5]](s32), [[V_READFIRSTLANE_B32_6]](s32), [[V_READFIRSTLANE_B32_7]](s32)
; CHECK: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV4]].sub1(s64), implicit $exec
; CHECK: [[MV4:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV4]](s64), [[UV4]](s64), implicit $exec
; CHECK: [[S_AND_B64_3:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_4]], [[S_AND_B64_2]], implicit-def $scc
; CHECK: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV5]].sub1(s64), implicit $exec
; CHECK: [[MV5:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV5]](s64), [[UV5]](s64), implicit $exec
; CHECK: [[S_AND_B64_4:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_5]], [[S_AND_B64_3]], implicit-def $scc
; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_8]](s32), [[V_READFIRSTLANE_B32_9]](s32), [[V_READFIRSTLANE_B32_10]](s32), [[V_READFIRSTLANE_B32_11]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(<4 x s32>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR2]](<8 x s32>), [[BUILD_VECTOR3]](<4 x s32>), 0, 0, 0 :: (dereferenceable load 16 from custom TargetCustom8)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_4]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: [[COPY13:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
; CHECK: G_STORE [[INT]](<4 x s32>), [[COPY13]](p1) :: (store 16 into `<4 x float> addrspace(1)* undef`, addrspace 1)
; CHECK: S_ENDPGM 0
%v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
store <4 x float> %v, <4 x float> addrspace(1)* undef
ret void
}
declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
attributes #0 = { nounwind readonly }

View File

@ -0,0 +1,173 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
; Natural mapping
define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Copies for VGPR arguments
define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 inreg %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_load__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY6]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Waterfall for rsrc
define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Waterfall for soffset
define amdgpu_ps float @raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: raw_buffer_load__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr1
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY5]](s32), implicit $exec
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Waterfall for rsrc and soffset
define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr5
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %15, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %9(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY5]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY5]](s32), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 immarg)

View File

@ -0,0 +1,179 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
; Natural mapping
define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Copies for VGPR arguments
define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, i32 inreg %vindex, i32 inreg %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: struct_buffer_load__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32)
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY7]](s32), [[COPY8]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Waterfall for rsrc
define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Waterfall for soffset
define amdgpu_ps float @struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: struct_buffer_load__sgpr_rsrc__vgpr_val__vgpr_vindex_vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr2
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY6]](s32), implicit $exec
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
; Waterfall for rsrc and soffset
define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr6
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.1, %16, %bb.2
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.1, %10(s32), %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY6]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY6]](s32), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.load), [[BUILD_VECTOR1]](<4 x s32>), [[COPY4]](s32), [[COPY5]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable load 4 from custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: $vgpr0 = COPY [[INT]](s32)
; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
%val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret float %val
}
declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg)

View File

@ -0,0 +1,174 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-fast -o - %s | FileCheck %s
; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -stop-after=regbankselect -regbankselect-greedy -o - %s | FileCheck %s
; Natural mapping
define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Copies for VGPR arguments
define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset(<4 x i32> inreg %rsrc, float inreg %val, i32 inreg %vindex, i32 inreg %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: struct_buffer_store__sgpr_rsrc__sgpr_val__sgpr_vindex__sgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6
; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr7
; CHECK: [[COPY6:%[0-9]+]]:sgpr(s32) = COPY $sgpr8
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr9
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32)
; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32)
; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[COPY6]](s32)
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY8]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY9]](s32), [[COPY10]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Waterfall for rsrc
define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset(<4 x i32> %rsrc, float %val, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
; CHECK-LABEL: name: struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__sgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Waterfall for soffset
define amdgpu_ps void @struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset(<4 x i32> inreg %rsrc, float %val, i32 %vindex, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: struct_buffer_store__sgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr3
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY7]](s32), implicit $exec
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
}
; Waterfall for rsrc and soffset
define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset(<4 x i32> %rsrc, float %val, i32 %vindex, i32 %voffset, i32 %soffset) {
; CHECK-LABEL: name: struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vgpr_voffset__vgpr_soffset
; CHECK: bb.1 (%ir-block.0):
; CHECK: successors: %bb.2(0x80000000)
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4
; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY $vgpr5
; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY $vgpr6
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr7
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>)
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
; CHECK: bb.2:
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF]], %bb.1, %14, %bb.2
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec
; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec
; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec
; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32)
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY7]](s32), implicit $exec
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]](s32), [[COPY7]](s32), implicit $exec
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.buffer.store), [[COPY4]](s32), [[BUILD_VECTOR1]](<4 x s32>), [[COPY5]](s32), [[COPY6]](s32), [[V_READFIRSTLANE_B32_4]](s32), 0 :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
; CHECK: bb.3:
; CHECK: successors: %bb.4(0x80000000)
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
; CHECK: bb.4:
; CHECK: S_ENDPGM 0
call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
ret void
}
declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)

View File

@ -1,5 +1,9 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
---
name: smulh_s32_ss
@ -8,10 +12,16 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1
; CHECK-LABEL: name: smulh_s32_ss
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
; GFX6-LABEL: name: smulh_s32_ss
; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]]
; GFX9-LABEL: name: smulh_s32_ss
; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; GFX9: [[SMULH:%[0-9]+]]:sgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_SMULH %0, %1
@ -24,10 +34,15 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0
; CHECK-LABEL: name: smulh_s32_sv
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
; GFX6-LABEL: name: smulh_s32_sv
; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
; GFX9-LABEL: name: smulh_s32_sv
; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX9: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = G_SMULH %0, %1
@ -40,11 +55,17 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0
; CHECK-LABEL: name: smulh_s32_vs
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]]
; GFX6-LABEL: name: smulh_s32_vs
; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]]
; GFX9-LABEL: name: smulh_s32_vs
; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX9: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY2]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_SMULH %0, %1
@ -57,10 +78,15 @@ legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; CHECK-LABEL: name: smulh_s32_vv
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
; GFX6-LABEL: name: smulh_s32_vv
; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX6: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
; GFX9-LABEL: name: smulh_s32_vv
; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX9: [[SMULH:%[0-9]+]]:vgpr(s32) = G_SMULH [[COPY]], [[COPY1]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = G_SMULH %0, %1

View File

@ -1,5 +1,9 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
# XUN: llc -march=amdgcn -mcpu=tahiti -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
# XUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
---
name: umulh_s32_ss
@ -8,10 +12,16 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1
; CHECK-LABEL: name: umulh_s32_ss
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
; GFX6-LABEL: name: umulh_s32_ss
; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]]
; GFX9-LABEL: name: umulh_s32_ss
; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; GFX9: [[UMULH:%[0-9]+]]:sgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_UMULH %0, %1
@ -24,10 +34,15 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0
; CHECK-LABEL: name: umulh_s32_sv
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
; GFX6-LABEL: name: umulh_s32_sv
; GFX6: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
; GFX9-LABEL: name: umulh_s32_sv
; GFX9: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX9: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = G_UMULH %0, %1
@ -40,11 +55,17 @@ legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0
; CHECK-LABEL: name: umulh_s32_vs
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]]
; GFX6-LABEL: name: umulh_s32_vs
; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX6: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]]
; GFX9-LABEL: name: umulh_s32_vs
; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; GFX9: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; GFX9: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY2]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_UMULH %0, %1
@ -57,10 +78,15 @@ legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; CHECK-LABEL: name: umulh_s32_vv
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
; GFX6-LABEL: name: umulh_s32_vv
; GFX6: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX6: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX6: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
; GFX9-LABEL: name: umulh_s32_vv
; GFX9: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; GFX9: [[UMULH:%[0-9]+]]:vgpr(s32) = G_UMULH [[COPY]], [[COPY1]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = G_UMULH %0, %1

View File

@ -0,0 +1,70 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s
# This would assert that a dead def should have no uses, but the dead
# def and use have different subreg indices.
---
name: multi_def_dead_reg_subreg_check
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27'
scratchWaveOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr32'
stackPtrOffsetReg: '$sgpr32'
argumentInfo:
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
privateSegmentWaveByteOffset: { reg: '$sgpr33' }
body: |
; CHECK-LABEL: name: multi_def_dead_reg_subreg_check
; CHECK: bb.0:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: liveins: $sgpr6_sgpr7
; CHECK: undef %0.sub3:vreg_512 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 0, [[V_MOV_B32_e32_]], implicit $exec
; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK: [[COPY:%[0-9]+]]:vreg_512 = COPY %0
; CHECK: bb.1:
; CHECK: successors: %bb.1(0x80000000)
; CHECK: BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5)
; CHECK: dead %6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec
; CHECK: dead %8:vreg_64 = DS_READ_B64_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec
; CHECK: dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
; CHECK: undef %11.sub1:vreg_512 = COPY [[COPY]].sub1
; CHECK: INLINEASM &"", 1, 851978, def dead [[COPY1]], 851978, def dead [[COPY]].sub1, 2147483657, [[COPY1]], 2147549193, [[COPY]].sub1
; CHECK: %11.sub0:vreg_512 = COPY [[COPY]].sub0
; CHECK: %11.sub3:vreg_512 = COPY [[COPY]].sub3
; CHECK: dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec
; CHECK: %11.sub2:vreg_512 = COPY undef [[V_MOV_B32_e32_]]
; CHECK: %11.sub5:vreg_512 = COPY undef [[V_MOV_B32_e32_]]
; CHECK: [[COPY2:%[0-9]+]]:vreg_512 = COPY %11
; CHECK: S_BRANCH %bb.1
bb.0:
liveins: $sgpr6_sgpr7
undef %0.sub3:vreg_512 = V_MOV_B32_e32 0, implicit $exec
%1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%2:vgpr_32 = V_ADD_U32_e32 0, %1, implicit $exec
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%4:vreg_512 = COPY %0
bb.1:
BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5)
%6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec
%8:vreg_64 = DS_READ_B64_gfx9 %1, 0, 0, implicit $exec
%9:vreg_128 = DS_READ_B128_gfx9 %2, 0, 0, implicit $exec
%10:vgpr_32 = V_ADD_I32_e32 4, %3, implicit-def dead $vcc, implicit $exec
undef %11.sub0:vreg_512 = COPY %4.sub0
%12:vgpr_32 = COPY %4.sub0
%11.sub1:vreg_512 = COPY %4.sub1
INLINEASM &"", 1, 851978, def dead %12, 851978, def dead %4.sub1, 2147483657, %12, 2147549193, %4.sub1
%11.sub2:vreg_512 = COPY undef %1
%11.sub3:vreg_512 = COPY %4.sub3
%11.sub5:vreg_512 = COPY undef %1
%4:vreg_512 = COPY %11
S_BRANCH %bb.1
...

View File

@ -0,0 +1,31 @@
// RUN: llvm-tblgen -gen-global-isel -optimize-match-table=false -I %p/Common -I %p/../../include %s -o - < %s | FileCheck -check-prefix=GISEL %s
include "llvm/Target/Target.td"
include "GlobalISelEmitterCommon.td"
let TargetPrefix = "mytarget" in {
def int_mytarget_sleep0 : Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>;
def int_mytarget_sleep1 : Intrinsic<[], [llvm_i32_ty], [ImmArg<0>]>;
}
// GISEL: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS,
// GISEL-NEXT: // MIs[0] Operand 0
// GISEL-NEXT: GIM_CheckIntrinsicID, /*MI*/0, /*Op*/0, Intrinsic::mytarget_sleep0,
// GISEL-NEXT: // MIs[0] src
// GISEL-NEXT: GIM_CheckIsImm, /*MI*/0, /*Op*/1,
// GISEL-NEXT: // (intrinsic_void {{[0-9]+}}:{ *:[iPTR] }, (timm:{ *:[i32] }):$src) => (SLEEP0 (timm:{ *:[i32] }):$src)
// GISEL-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SLEEP0,
// GISEL-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/1, // src
def SLEEP0 : I<(outs), (ins i32imm:$src),
[(int_mytarget_sleep0 timm:$src)]
>;
// Test for situation which was crashing in ARM patterns.
def p_imm : Operand<i32>;
def SLEEP1 : I<(outs), (ins p_imm:$src), []>;
// FIXME: This should not crash, but should it work or be an error?
// def : Pat <
// (int_mytarget_sleep1 timm:$src),
// (SLEEP1 imm:$src)
// >;

View File

@ -1062,6 +1062,7 @@ public:
IPM_Opcode,
IPM_NumOperands,
IPM_ImmPredicate,
IPM_Imm,
IPM_AtomicOrderingMMO,
IPM_MemoryLLTSize,
IPM_MemoryVsLLTSize,
@ -1340,6 +1341,23 @@ public:
}
};
class ImmOperandMatcher : public OperandPredicateMatcher {
public:
ImmOperandMatcher(unsigned InsnVarID, unsigned OpIdx)
: OperandPredicateMatcher(IPM_Imm, InsnVarID, OpIdx) {}
static bool classof(const PredicateMatcher *P) {
return P->getKind() == IPM_Imm;
}
void emitPredicateOpcodes(MatchTable &Table,
RuleMatcher &Rule) const override {
Table << MatchTable::Opcode("GIM_CheckIsImm") << MatchTable::Comment("MI")
<< MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op")
<< MatchTable::IntValue(OpIdx) << MatchTable::LineBreak;
}
};
/// Generates code to check that an operand is a G_CONSTANT with a particular
/// int.
class ConstantIntOperandMatcher : public OperandPredicateMatcher {
@ -3794,6 +3812,10 @@ Error GlobalISelEmitter::importChildMatcher(RuleMatcher &Rule,
OM.addPredicate<MBBOperandMatcher>();
return Error::success();
}
if (SrcChild->getOperator()->getName() == "timm") {
OM.addPredicate<ImmOperandMatcher>();
return Error::success();
}
}
}
@ -3943,7 +3965,10 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
// rendered as operands.
// FIXME: The target should be able to choose sign-extended when appropriate
// (e.g. on Mips).
if (DstChild->getOperator()->getName() == "imm") {
if (DstChild->getOperator()->getName() == "timm") {
DstMIBuilder.addRenderer<CopyRenderer>(DstChild->getName());
return InsertPt;
} else if (DstChild->getOperator()->getName() == "imm") {
DstMIBuilder.addRenderer<CopyConstantAsImmRenderer>(DstChild->getName());
return InsertPt;
} else if (DstChild->getOperator()->getName() == "fpimm") {