AMDGPU/GlobalISel: Insert waterfall loop for vector indexing
The register index can only really be an SGPR. Lie that a VGPR index is legal, and then rewrite the instruction in a waterfall loop to handle the index. llvm-svn: 357235
This commit is contained in:
parent
1445b77e8c
commit
5fddf09187
|
@ -13,9 +13,11 @@
|
|||
|
||||
#include "AMDGPURegisterBankInfo.h"
|
||||
#include "AMDGPUInstrInfo.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "SIMachineFunctionInfo.h"
|
||||
#include "SIRegisterInfo.h"
|
||||
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||||
#include "llvm/ADT/SmallSet.h"
|
||||
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
||||
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
|
||||
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
|
||||
|
@ -328,6 +330,170 @@ static LLT getHalfSizedType(LLT Ty) {
|
|||
return LLT::scalar(Ty.getSizeInBits() / 2);
|
||||
}
|
||||
|
||||
/// Legalize instruction \p MI where operands in \p OpIndices must be SGPRs. If
|
||||
/// any of the required SGPR operands are VGPRs, perform a waterfall loop to
|
||||
/// execute the instruction for each unique combination of values in all lanes
|
||||
/// in the wave. The block will be split such that new blocks
|
||||
void AMDGPURegisterBankInfo::executeInWaterfallLoop(
|
||||
MachineInstr &MI, MachineRegisterInfo &MRI,
|
||||
ArrayRef<unsigned> OpIndices) const {
|
||||
MachineFunction *MF = MI.getParent()->getParent();
|
||||
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
|
||||
const SIInstrInfo *TII = ST.getInstrInfo();
|
||||
MachineBasicBlock::iterator I(MI);
|
||||
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
|
||||
assert(OpIndices.size() == 1 &&
|
||||
"need to implement support for multiple operands");
|
||||
|
||||
// Use a set to avoid extra readfirstlanes in the case where multiple operands
|
||||
// are the same register.
|
||||
SmallSet<unsigned, 4> SGPROperandRegs;
|
||||
for (unsigned Op : OpIndices) {
|
||||
assert(MI.getOperand(Op).isUse());
|
||||
unsigned Reg = MI.getOperand(Op).getReg();
|
||||
const RegisterBank *OpBank = getRegBank(Reg, MRI, *TRI);
|
||||
if (OpBank->getID() == AMDGPU::VGPRRegBankID)
|
||||
SGPROperandRegs.insert(Reg);
|
||||
}
|
||||
|
||||
// No operands need to be replaced, so no need to loop.
|
||||
if (SGPROperandRegs.empty())
|
||||
return;
|
||||
|
||||
MachineIRBuilder B(MI);
|
||||
SmallVector<unsigned, 4> ResultRegs;
|
||||
SmallVector<unsigned, 4> InitResultRegs;
|
||||
SmallVector<unsigned, 4> PhiRegs;
|
||||
for (MachineOperand &Def : MI.defs()) {
|
||||
LLT ResTy = MRI.getType(Def.getReg());
|
||||
const RegisterBank *DefBank = getRegBank(Def.getReg(), MRI, *TRI);
|
||||
ResultRegs.push_back(Def.getReg());
|
||||
unsigned InitReg = B.buildUndef(ResTy).getReg(0);
|
||||
unsigned PhiReg = MRI.createGenericVirtualRegister(ResTy);
|
||||
InitResultRegs.push_back(InitReg);
|
||||
PhiRegs.push_back(PhiReg);
|
||||
MRI.setRegBank(PhiReg, *DefBank);
|
||||
MRI.setRegBank(InitReg, *DefBank);
|
||||
}
|
||||
|
||||
unsigned SaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
|
||||
unsigned InitSaveExecReg = MRI.createVirtualRegister(&AMDGPU::SReg_64_XEXECRegClass);
|
||||
|
||||
// Don't bother using generic instructions/registers for the exec mask.
|
||||
B.buildInstr(TargetOpcode::IMPLICIT_DEF)
|
||||
.addDef(InitSaveExecReg);
|
||||
|
||||
// Save the EXEC mask
|
||||
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), SaveExecReg)
|
||||
.addReg(AMDGPU::EXEC);
|
||||
|
||||
unsigned PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
unsigned NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
unsigned CondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
|
||||
|
||||
// To insert the loop we need to split the block. Move everything before this
|
||||
// point to a new block, and insert a new empty block before this instruction.
|
||||
MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
|
||||
MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
|
||||
MachineBasicBlock *RestoreExecBB = MF->CreateMachineBasicBlock();
|
||||
MachineFunction::iterator MBBI(MBB);
|
||||
++MBBI;
|
||||
MF->insert(MBBI, LoopBB);
|
||||
MF->insert(MBBI, RestoreExecBB);
|
||||
MF->insert(MBBI, RemainderBB);
|
||||
|
||||
LoopBB->addSuccessor(RestoreExecBB);
|
||||
LoopBB->addSuccessor(LoopBB);
|
||||
|
||||
// Move the rest of the block into a new block.
|
||||
RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
|
||||
RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
|
||||
|
||||
MBB.addSuccessor(LoopBB);
|
||||
RestoreExecBB->addSuccessor(RemainderBB);
|
||||
|
||||
B.setInsertPt(*LoopBB, LoopBB->end());
|
||||
|
||||
B.buildInstr(TargetOpcode::PHI)
|
||||
.addDef(PhiExec)
|
||||
.addReg(InitSaveExecReg)
|
||||
.addMBB(&MBB)
|
||||
.addReg(NewExec)
|
||||
.addMBB(LoopBB);
|
||||
|
||||
for (auto Result : zip(InitResultRegs, ResultRegs, PhiRegs)) {
|
||||
B.buildInstr(TargetOpcode::G_PHI)
|
||||
.addDef(std::get<2>(Result))
|
||||
.addReg(std::get<0>(Result)) // Initial value / implicit_def
|
||||
.addMBB(&MBB)
|
||||
.addReg(std::get<1>(Result)) // Mid-loop value.
|
||||
.addMBB(LoopBB);
|
||||
}
|
||||
|
||||
// Move the instruction into the loop.
|
||||
LoopBB->splice(LoopBB->end(), &MBB, I);
|
||||
I = std::prev(LoopBB->end());
|
||||
|
||||
for (MachineOperand &Op : MI.uses()) {
|
||||
if (!Op.isReg())
|
||||
continue;
|
||||
|
||||
assert(!Op.isDef());
|
||||
if (SGPROperandRegs.count(Op.getReg())) {
|
||||
unsigned CurrentLaneOpReg
|
||||
= MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
|
||||
MRI.setType(CurrentLaneOpReg, LLT::scalar(32)); // FIXME
|
||||
|
||||
assert(MRI.getType(Op.getReg())== LLT::scalar(32) &&
|
||||
"need to implement support for other types");
|
||||
|
||||
constrainGenericRegister(Op.getReg(), AMDGPU::VGPR_32RegClass, MRI);
|
||||
|
||||
// Read the next variant <- also loop target.
|
||||
BuildMI(*LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
|
||||
CurrentLaneOpReg)
|
||||
.addReg(Op.getReg());
|
||||
|
||||
// FIXME: Need to and each conditon
|
||||
|
||||
// Compare the just read SGPR value to all possible operand values.
|
||||
B.buildInstr(AMDGPU::V_CMP_EQ_U32_e64)
|
||||
.addDef(CondReg)
|
||||
.addReg(CurrentLaneOpReg)
|
||||
.addReg(Op.getReg());
|
||||
Op.setReg(CurrentLaneOpReg);
|
||||
}
|
||||
}
|
||||
|
||||
// Update EXEC, save the original EXEC value to VCC.
|
||||
B.buildInstr(AMDGPU::S_AND_SAVEEXEC_B64)
|
||||
.addDef(NewExec)
|
||||
.addReg(CondReg, RegState::Kill);
|
||||
|
||||
MRI.setSimpleHint(NewExec, CondReg);
|
||||
|
||||
// Update EXEC, switch all done bits to 0 and all todo bits to 1.
|
||||
B.buildInstr(AMDGPU::S_XOR_B64_term)
|
||||
.addDef(AMDGPU::EXEC)
|
||||
.addReg(AMDGPU::EXEC)
|
||||
.addReg(NewExec);
|
||||
|
||||
// XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
|
||||
// s_cbranch_scc0?
|
||||
|
||||
// Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
|
||||
B.buildInstr(AMDGPU::S_CBRANCH_EXECNZ)
|
||||
.addMBB(LoopBB);
|
||||
|
||||
// Restore the EXEC mask
|
||||
B.buildInstr(AMDGPU::S_MOV_B64_term)
|
||||
.addDef(AMDGPU::EXEC)
|
||||
.addReg(SaveExecReg);
|
||||
}
|
||||
|
||||
void AMDGPURegisterBankInfo::applyMappingImpl(
|
||||
const OperandsMapper &OpdMapper) const {
|
||||
MachineInstr &MI = OpdMapper.getMI();
|
||||
|
@ -436,6 +602,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
|
|||
MI.eraseFromParent();
|
||||
return;
|
||||
}
|
||||
case AMDGPU::G_EXTRACT_VECTOR_ELT:
|
||||
applyDefaultMapping(OpdMapper);
|
||||
executeInWaterfallLoop(MI, MRI, { 2 });
|
||||
return;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -37,6 +37,10 @@ protected:
|
|||
class AMDGPURegisterBankInfo : public AMDGPUGenRegisterBankInfo {
|
||||
const SIRegisterInfo *TRI;
|
||||
|
||||
void executeInWaterfallLoop(MachineInstr &MI,
|
||||
MachineRegisterInfo &MRI,
|
||||
ArrayRef<unsigned> OpIndices) const;
|
||||
|
||||
/// See RegisterBankInfo::applyMapping.
|
||||
void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
|
||||
|
||||
|
|
|
@ -3,13 +3,15 @@
|
|||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
|
||||
|
||||
---
|
||||
name: extract_vector_elt_v16i32_ss
|
||||
name: extract_vector_elt_v16s32_ss
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
|
||||
; CHECK-LABEL: name: extract_vector_elt_v16i32_ss
|
||||
; CHECK-LABEL: name: extract_vector_elt_v16s32_ss
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr16
|
||||
; CHECK: [[EVEC:%[0-9]+]]:sgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
|
||||
|
@ -21,17 +23,36 @@ body: |
|
|||
...
|
||||
|
||||
---
|
||||
name: extract_vector_elt_v16i32_sv
|
||||
name: extract_vector_elt_v16s32_sv
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
|
||||
; CHECK-LABEL: name: extract_vector_elt_v16i32_sv
|
||||
; CHECK-LABEL: name: extract_vector_elt_v16s32_sv
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $vgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr0
|
||||
; CHECK: [[COPY2:%[0-9]+]]:vgpr(<16 x s32>) = COPY [[COPY]](<16 x s32>)
|
||||
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<16 x s32>), [[COPY1]](s32)
|
||||
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: .1:
|
||||
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %9, %bb.1
|
||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: .2:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: .3:
|
||||
; CHECK: $vgpr0 = COPY [[EVEC]](s32)
|
||||
%0:_(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%1:_(s32) = COPY $vgpr0
|
||||
|
@ -40,13 +61,15 @@ body: |
|
|||
...
|
||||
|
||||
---
|
||||
name: extract_vector_elt_v16i32_vs
|
||||
name: extract_vector_elt_v16s32_vs
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
|
||||
; CHECK-LABEL: name: extract_vector_elt_v16i32_vs
|
||||
; CHECK-LABEL: name: extract_vector_elt_v16s32_vs
|
||||
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $sgpr0
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
|
||||
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
|
||||
|
@ -58,19 +81,75 @@ body: |
|
|||
...
|
||||
|
||||
---
|
||||
name: extract_vector_elt_v16i32_vv
|
||||
name: extract_vector_elt_v16s32_vv
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
|
||||
; CHECK-LABEL: name: extract_vector_elt_v16i32_vv
|
||||
; CHECK-LABEL: name: extract_vector_elt_v16s32_vv
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr16
|
||||
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[COPY1]](s32)
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
|
||||
; CHECK: [[DEF:%[0-9]+]]:vgpr(s32) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: .1:
|
||||
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %8, %bb.1
|
||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s32) = G_PHI [[DEF]](s32), %bb.0, %2(s32), %bb.1
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: .2:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: .3:
|
||||
; CHECK: $vgpr0 = COPY [[EVEC]](s32)
|
||||
%0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
%1:_(s32) = COPY $vgpr16
|
||||
%2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1
|
||||
$vgpr0 = COPY %2
|
||||
...
|
||||
|
||||
---
|
||||
name: extract_vector_elt_v8s64_vv
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
|
||||
; CHECK-LABEL: name: extract_vector_elt_v8s64_vv
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16
|
||||
; CHECK: [[COPY:%[0-9]+]]:vgpr(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
; CHECK: [[COPY1:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr16
|
||||
; CHECK: [[DEF:%[0-9]+]]:vgpr(s64) = G_IMPLICIT_DEF
|
||||
; CHECK: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
|
||||
; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
|
||||
; CHECK: .1:
|
||||
; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000)
|
||||
; CHECK: [[PHI:%[0-9]+]]:sreg_64 = PHI [[DEF1]], %bb.0, %8, %bb.1
|
||||
; CHECK: [[PHI1:%[0-9]+]]:vgpr(s64) = G_PHI [[DEF]](s64), %bb.0, %2(s64), %bb.1
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[COPY1]](s32), implicit $exec
|
||||
; CHECK: [[EVEC:%[0-9]+]]:vgpr(s64) = G_EXTRACT_VECTOR_ELT [[COPY]](<8 x s64>), [[V_READFIRSTLANE_B32_]](s32)
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]](s32), [[COPY1]](s32), implicit $exec
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]]
|
||||
; CHECK: .2:
|
||||
; CHECK: successors: %bb.3(0x80000000)
|
||||
; CHECK: .3:
|
||||
; CHECK: $vgpr0_vgpr1 = COPY [[EVEC]](s64)
|
||||
%0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
|
||||
%1:_(s32) = COPY $vgpr16
|
||||
%2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
|
||||
$vgpr0_vgpr1 = COPY %2
|
||||
...
|
||||
|
|
Loading…
Reference in New Issue