[AArch64][Falkor] Refine sched details for LSLfast/ASRfast.

llvm-svn: 303682
This commit is contained in:
Geoff Berry 2017-05-23 19:57:45 +00:00
parent 53a21292f8
commit d6ac96f953
4 changed files with 189 additions and 40 deletions

View File

@ -763,15 +763,126 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
llvm_unreachable("Unknown opcode to check as cheap as a move!");
}
bool AArch64InstrInfo::isFalkorLSLFast(const MachineInstr &MI) const {
if (MI.getNumOperands() < 4)
bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
return false;
unsigned ShOpVal = MI.getOperand(3).getImm();
unsigned ShImm = AArch64_AM::getShiftValue(ShOpVal);
if (AArch64_AM::getShiftType(ShOpVal) == AArch64_AM::LSL &&
ShImm < 4)
return true;
return false;
case AArch64::ADDWrs:
case AArch64::ADDXrs:
case AArch64::ADDSWrs:
case AArch64::ADDSXrs: {
unsigned Imm = MI.getOperand(3).getImm();
unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
if (ShiftVal == 0)
return true;
return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
}
case AArch64::ADDWrx:
case AArch64::ADDXrx:
case AArch64::ADDXrx64:
case AArch64::ADDSWrx:
case AArch64::ADDSXrx:
case AArch64::ADDSXrx64: {
unsigned Imm = MI.getOperand(3).getImm();
switch (AArch64_AM::getArithExtendType(Imm)) {
default:
return false;
case AArch64_AM::UXTB:
case AArch64_AM::UXTH:
case AArch64_AM::UXTW:
case AArch64_AM::UXTX:
return AArch64_AM::getArithShiftValue(Imm) <= 4;
}
}
case AArch64::SUBWrs:
case AArch64::SUBSWrs: {
unsigned Imm = MI.getOperand(3).getImm();
unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
return ShiftVal == 0 ||
(AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
}
case AArch64::SUBXrs:
case AArch64::SUBSXrs: {
unsigned Imm = MI.getOperand(3).getImm();
unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
return ShiftVal == 0 ||
(AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
}
case AArch64::SUBWrx:
case AArch64::SUBXrx:
case AArch64::SUBXrx64:
case AArch64::SUBSWrx:
case AArch64::SUBSXrx:
case AArch64::SUBSXrx64: {
unsigned Imm = MI.getOperand(3).getImm();
switch (AArch64_AM::getArithExtendType(Imm)) {
default:
return false;
case AArch64_AM::UXTB:
case AArch64_AM::UXTH:
case AArch64_AM::UXTW:
case AArch64_AM::UXTX:
return AArch64_AM::getArithShiftValue(Imm) == 0;
}
}
case AArch64::LDRBBroW:
case AArch64::LDRBBroX:
case AArch64::LDRBroW:
case AArch64::LDRBroX:
case AArch64::LDRDroW:
case AArch64::LDRDroX:
case AArch64::LDRHHroW:
case AArch64::LDRHHroX:
case AArch64::LDRHroW:
case AArch64::LDRHroX:
case AArch64::LDRQroW:
case AArch64::LDRQroX:
case AArch64::LDRSBWroW:
case AArch64::LDRSBWroX:
case AArch64::LDRSBXroW:
case AArch64::LDRSBXroX:
case AArch64::LDRSHWroW:
case AArch64::LDRSHWroX:
case AArch64::LDRSHXroW:
case AArch64::LDRSHXroX:
case AArch64::LDRSWroW:
case AArch64::LDRSWroX:
case AArch64::LDRSroW:
case AArch64::LDRSroX:
case AArch64::LDRWroW:
case AArch64::LDRWroX:
case AArch64::LDRXroW:
case AArch64::LDRXroX:
case AArch64::PRFMroW:
case AArch64::PRFMroX:
case AArch64::STRBBroW:
case AArch64::STRBBroX:
case AArch64::STRBroW:
case AArch64::STRBroX:
case AArch64::STRDroW:
case AArch64::STRDroX:
case AArch64::STRHHroW:
case AArch64::STRHHroX:
case AArch64::STRHroW:
case AArch64::STRHroX:
case AArch64::STRQroW:
case AArch64::STRQroX:
case AArch64::STRSroW:
case AArch64::STRSroX:
case AArch64::STRWroW:
case AArch64::STRWroX:
case AArch64::STRXroW:
case AArch64::STRXroX: {
unsigned IsSigned = MI.getOperand(3).getImm();
return !IsSigned;
}
}
}
bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,

View File

@ -270,7 +270,7 @@ public:
bool IsTailCall) const override;
/// Returns true if the instruction has a shift by immediate that can be
/// executed in one cycle less.
bool isFalkorLSLFast(const MachineInstr &MI) const;
bool isFalkorShiftExtFast(const MachineInstr &MI) const;
private:
/// \brief Sets the offsets on outlined instructions in \p MBB which use SP

View File

@ -266,14 +266,20 @@ def : InstRW<[FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, WriteAdr],(instregex "^LD4
// Arithmetic and Logical Instructions
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_ADD], (instregex "^ADD(S)?(W|X)r(s|x)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADC(S)?(W|X)r$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADD(S)?(W|X)r(r|i)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(i|r|s)$")>;
def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^SUB(S)?(W|X)r(s|x)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SBC(S)?(W|X)r$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SUB(S)?(W|X)r(r|i)$")>;
def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>;
def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>;
// SIMD Miscellaneous Instructions
// -----------------------------------------------------------------------------
@ -328,9 +334,14 @@ def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>;
// SIMD Store Instructions
// -----------------------------------------------------------------------------
def : InstRW<[WriteVST], (instregex "^STP(D|S)(i)$")>;
def : InstRW<[WriteVST, WriteAdr], (instregex "^STP(D|S)(post|pre)$")>;
def : InstRW<[FalkorWr_2XYZ_2ST_2VSD_0cyc], (instregex "^STRQro(W|X)$")>;
def : InstRW<[WriteVST], (instregex "^STR(Q|D|S|H|B)ui$")>;
def : InstRW<[WriteVST, WriteVST], (instregex "^STPQi$")>;
def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^STPQ(post|pre)$")>;
def : InstRW<[WriteVST], (instregex "^STP(D|S)(i)$")>;
def : InstRW<[WriteVST, WriteAdr], (instregex "^STP(D|S)(post|pre)$")>;
def : InstRW<[FalkorWr_STRVro], (instregex "^STR(D|S|H|B)ro(W|X)$")>;
def : InstRW<[FalkorWr_STRQro], (instregex "^STRQro(W|X)$")>;
def : InstRW<[WriteVST], (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
def : InstRW<[WriteVST], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>;
@ -391,7 +402,7 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>;
def : InstRW<[WriteLD], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
def : InstRW<[WriteLD], (instregex "^LDUR(Q|D|S|H|B)i$")>;
def : InstRW<[FalkorWr_LDR], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDNPQi)>;
def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDPQi)>;
def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDNP(D|S)i$")>;
@ -461,10 +472,10 @@ def : InstRW<[FalkorWr_1LD_4cyc], (instrs LDRSWl)>;
def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
def : InstRW<[FalkorWr_PRFM], (instregex "^PRFMro(W|X)$")>;
def : InstRW<[FalkorWr_LDR], (instregex "^LDR(B|H|W|X)ro(W|X)$")>;
def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>;
def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(B|H|W|X)ro(W|X)$")>;
def : InstRW<[FalkorWr_LDRS], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
def : InstRW<[FalkorWr_LDRSro], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
def : InstRW<[FalkorWr_1LD_4cyc, WriteAdr],(instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
@ -529,14 +540,9 @@ def : InstRW<[WriteVST, WriteVST], (instrs STNPQi)>;
// -----------------------------------------------------------------------------
def : InstRW<[WriteST], (instregex "^STP(W|X)i$")>;
def : InstRW<[WriteST, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>;
def : InstRW<[WriteST], (instregex "^STR(Q|D|S|BB|HH)ui$")>;
def : InstRW<[WriteST], (instregex "^STR(BB|HH|W|X)ui$")>;
def : InstRW<[WriteST, WriteAdr], (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
def : InstRW<[WriteST], (instregex "^STUR(Q|D|S|BB|HH)i$")>;
def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)ui$")>;
def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)(post|pre)$")>;
def : InstRW<[WriteST], (instregex "^STTR(B|H|W|X)i$")>;
def : InstRW<[WriteST], (instregex "^STUR(B|H|W|X)i$")>;
def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)ro(W|X)$")>;
def : InstRW<[WriteVST, WriteVST], (instregex "^STPQi$")>;
def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^STPQ(post|pre)$")>;
def : InstRW<[FalkorWr_STRro], (instregex "^STR(B|H|W|X)ro(W|X)$")>;

View File

@ -186,6 +186,11 @@ def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
let NumMicroOps = 2;
}
def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> {
let Latency = 0;
let NumMicroOps = 2;
}
//===----------------------------------------------------------------------===//
// Define 3 micro-op types
@ -243,6 +248,14 @@ def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
let NumMicroOps = 3;
}
def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> {
let Latency = 0;
let NumMicroOps = 3;
}
def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> {
let Latency = 0;
let NumMicroOps = 3;
}
//===----------------------------------------------------------------------===//
// Define 4 micro-op types
@ -316,6 +329,12 @@ def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
let Latency = 7;
let NumMicroOps = 5;
}
def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
FalkorUnitVSD, FalkorUnitST,
FalkorUnitVSD]> {
let Latency = 0;
let NumMicroOps = 5;
}
//===----------------------------------------------------------------------===//
// Define 6 micro-op types
@ -373,12 +392,12 @@ def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr
def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>;
def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>;
// SchedPredicates and WriteVariants for Immediate Zero and LSLFast
// SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast
// -----------------------------------------------------------------------------
def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).getImm() == 0}]>;
def FalkorFMOVZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR ||
MI->getOperand(1).getReg() == AArch64::XZR}]>;
def FalkorLSLFastPred : SchedPredicate<[{TII->isFalkorLSLFast(*MI)}]>;
def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>;
def FalkorWr_FMOV : SchedWriteVariant<[
SchedVar<FalkorFMOVZrReg, [FalkorWr_1none_0cyc]>,
@ -388,18 +407,31 @@ def FalkorWr_MOVZ : SchedWriteVariant<[
SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1XYZB_1cyc]>]>;
def FalkorWr_LDR : SchedWriteVariant<[
SchedVar<FalkorLSLFastPred, [FalkorWr_1LD_3cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_4cyc]>]>;
def FalkorWr_ADDSUBsx : SchedWriteVariant<[
SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_1cyc]>,
SchedVar<NoSchedPred, [FalkorWr_2XYZ_2cyc]>]>;
def FalkorWr_ADD : SchedWriteVariant<[
SchedVar<FalkorLSLFastPred, [FalkorWr_1XYZ_1cyc]>,
SchedVar<NoSchedPred, [FalkorWr_2XYZ_2cyc]>]>;
def FalkorWr_LDRro : SchedWriteVariant<[
SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_3cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_4cyc]>]>;
def FalkorWr_PRFM : SchedWriteVariant<[
SchedVar<FalkorLSLFastPred, [FalkorWr_1ST_3cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>;
def FalkorWr_LDRSro : SchedWriteVariant<[
SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_4cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>;
def FalkorWr_PRFMro : SchedWriteVariant<[
SchedVar<FalkorShiftExtFastPred, [FalkorWr_1ST_3cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>;
def FalkorWr_STRVro : SchedWriteVariant<[
SchedVar<FalkorShiftExtFastPred, [FalkorWr_1VSD_1ST_0cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1VSD_1ST_0cyc]>]>;
def FalkorWr_STRQro : SchedWriteVariant<[
SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_2ST_2VSD_0cyc]>,
SchedVar<NoSchedPred, [FalkorWr_2XYZ_2ST_2VSD_0cyc]>]>;
def FalkorWr_STRro : SchedWriteVariant<[
SchedVar<FalkorShiftExtFastPred, [FalkorWr_1SD_1ST_0cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1SD_1ST_0cyc]>]>;
def FalkorWr_LDRS : SchedWriteVariant<[
SchedVar<FalkorLSLFastPred, [FalkorWr_1LD_4cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>;