[AMDGPU] Preliminary patch for divergence driven instruction selection. Inline immediate move to V_MADAK_F32.

Differential revision: https://reviews.llvm.org/D51586

    Reviewer: rampitec

llvm-svn: 341843
This commit is contained in:
Alexander Timofeev 2018-09-10 16:42:49 +00:00
parent 05a623eb87
commit 20cbe6f319
2 changed files with 218 additions and 5 deletions

View File

@ -2066,12 +2066,40 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (Src2->isReg() && Src2->getReg() == Reg) {
// Not allowed to use constant bus for another operand.
// We can however allow an inline immediate as src0.
if (!Src0->isImm() &&
(Src0->isReg() && RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
return false;
bool Src0Inlined = false;
if (Src0->isReg()) {
// Try to inline constant if possible.
// If the Def moves immediate and the use is single
// We are saving VGPR here.
MachineInstr *Def = MRI->getUniqueVRegDef(Src0->getReg());
if (Def && Def->isMoveImmediate() &&
isInlineConstant(Def->getOperand(1)) &&
MRI->hasOneUse(Src0->getReg())) {
Src0->ChangeToImmediate(Def->getOperand(1).getImm());
Src0Inlined = true;
} else if ((RI.isPhysicalRegister(Src0->getReg()) &&
RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg()))) ||
(RI.isVirtualRegister(Src0->getReg()) &&
RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
return false;
// VGPR is okay as Src0 - fallthrough
}
if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
return false;
if (Src1->isReg() && !Src0Inlined ) {
// We have one slot for inlinable constant so far - try to fill it
MachineInstr *Def = MRI->getUniqueVRegDef(Src1->getReg());
if (Def && Def->isMoveImmediate() &&
isInlineConstant(Def->getOperand(1)) &&
MRI->hasOneUse(Src1->getReg()) &&
commuteInstruction(UseMI)) {
Src0->ChangeToImmediate(Def->getOperand(1).getImm());
} else if ((RI.isPhysicalRegister(Src1->getReg()) &&
RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) ||
(RI.isVirtualRegister(Src1->getReg()) &&
RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
return false;
// VGPR is okay as Src1 - fallthrough
}
const int64_t Imm = ImmOp->getImm();

View File

@ -0,0 +1,185 @@
# RUN: llc -march=amdgcn -run-pass peephole-opt -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
# GCN-LABEL: bb.0:
# GCN: V_MOV_B32_e32 1092616192
# GCN: S_MOV_B32 1082130432
# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec
---
name: test src1-inlined
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
%17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
%18:sreg_32 = S_MOV_B32 1082130432
%19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed %18, 0, %17, 0, 0, implicit $exec
...
# GCN-LABEL: bb.0:
# GCN: V_MOV_B32_e32 1092616192
# GCN: S_MOV_B32 1082130432
# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec
---
name: test src0-inlined
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
%17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
%18:sreg_32 = S_MOV_B32 1082130432
%19:vgpr_32 = V_MAC_F32_e64 0, killed %18, 0, killed %0, 0, %17, 0, 0, implicit $exec
...
# GCN-LABEL: bb.0:
# GCN: V_MOV_B32_e32 1092616192
# GCN: S_MOV_B32 1082130432
# GCN: %3:vgpr_32 = V_MADAK_F32 killed %0, killed %0, 1092616192, implicit $exec
---
name: test none-inlined
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
%17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
%18:sreg_32 = S_MOV_B32 1082130432
%19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed %0, 0, %17, 0, 0, implicit $exec
...
# GCN-LABEL: bb.0:
# GCN: V_MOV_B32_e32 1092616192
# GCN: V_MOV_B32_e32 1082130432
# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec
---
name: test src1-2vgprs-inlined
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
%17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
%18:vgpr_32 = V_MOV_B32_e32 1082130432, implicit $exec
%19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed %18, 0, %17, 0, 0, implicit $exec
...
# GCN-LABEL: bb.0:
# GCN: V_MOV_B32_e32 1092616192
# GCN: V_MOV_B32_e32 1082130432
# GCN: %3:vgpr_32 = V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec
---
name: test src0-2vgprs-inlined
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
%17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
%18:vgpr_32 = V_MOV_B32_e32 1082130432, implicit $exec
%19:vgpr_32 = V_MAC_F32_e64 0, killed %18, 0, killed %0, 0, %17, 0, 0, implicit $exec
...
# GCN-LABEL: bb.0:
# GCN: V_MOV_B32_e32 1092616192, implicit $exec
# GCN: S_MOV_B32 1082130432
# GCN: V_MADAK_F32 1082130432, killed $vgpr1, 1092616192, implicit $exec
---
name: test src0-phys-vgpr
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
$vgpr1 = COPY $vgpr0
%17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
%18:sgpr_32 = S_MOV_B32 1082130432
%19:vgpr_32 = V_MAC_F32_e64 0, killed $vgpr1, 0, killed %18, 0, %17, 0, 0, implicit $exec
...
# GCN-LABEL: bb.0:
# GCN: V_MOV_B32_e32 1092616192, implicit $exec
# GCN: S_MOV_B32 1082130432
# GCN: V_MADAK_F32 1082130432, killed $vgpr0, 1092616192, implicit $exec
---
name: test src1-phys-vgpr
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
%17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
%18:sgpr_32 = S_MOV_B32 1082130432
%19:vgpr_32 = V_MAC_F32_e64 0, killed %18, 0, killed $vgpr0, 0, %17, 0, 0, implicit $exec
...
# GCN-LABEL: bb.0:
# GCN: V_MOV_B32_e32 1092616192, implicit $exec
# GCN: V_MAC_F32_e64 0, killed $sgpr2, 0, killed %0, 0, %1, 0, 0, implicit $exec
---
name: test src0-phys-sgpr
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2
%0:vgpr_32 = COPY $vgpr0
%17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
%19:vgpr_32 = V_MAC_F32_e64 0, killed $sgpr2, 0, killed %0, 0, %17, 0, 0, implicit $exec
...
# GCN-LABEL: bb.0:
# GCN: V_MOV_B32_e32 1092616192, implicit $exec
# GCN: V_MAC_F32_e64 0, killed %0, 0, killed $sgpr2, 0, %1, 0, 0, implicit $exec
---
name: test src1-phys-sgpr
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2
%0:vgpr_32 = COPY $vgpr0
%17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
%19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed $sgpr2, 0, %17, 0, 0, implicit $exec
...
# GCN-LABEL: bb.0:
# GCN: V_MOV_B32_e32 1092616192, implicit $exec
# GCN: $sgpr2 = S_MOV_B32 1082130432
# GCN: V_MADAK_F32 1082130432, killed %0, 1092616192, implicit $exec
---
name: test src1-phys-sgpr-move
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
%0:vgpr_32 = COPY $vgpr0
%17:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
$sgpr2 = S_MOV_B32 1082130432
%19:vgpr_32 = V_MAC_F32_e64 0, killed %0, 0, killed $sgpr2, 0, %17, 0, 0, implicit $exec
...