Enhance the InstrStage object to enable the specification of an Itinerary with overlapping stages. The default is to maintain the current behavior that the "next" stage immediately follows the previous one.

llvm-svn: 78827
This commit is contained in:
David Goodwin 2009-08-12 18:31:53 +00:00
parent 887cd6a8eb
commit b369ee4c48
8 changed files with 168 additions and 116 deletions

View File

@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
// This file describes the structures used for instruction itineraries and
// states. This is used by schedulers to determine instruction states and
// stages. This is used by schedulers to determine instruction stages and
// latencies.
//
//===----------------------------------------------------------------------===//
@ -16,17 +16,57 @@
#ifndef LLVM_TARGET_TARGETINSTRITINERARIES_H
#define LLVM_TARGET_TARGETINSTRITINERARIES_H
#include <algorithm>
namespace llvm {
//===----------------------------------------------------------------------===//
/// Instruction stage - These values represent a step in the execution of an
/// instruction. The latency represents the number of discrete time slots
/// needed to complete the stage. Units represent the choice of functional
/// units that can be used to complete the stage. Eg. IntUnit1, IntUnit2.
/// Instruction stage - These values represent a non-pipelined step in
/// the execution of an instruction. Cycles represents the number of
/// discrete time slots needed to complete the stage. Units represent
/// the choice of functional units that can be used to complete the
/// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many
/// cycles should elapse from the start of this stage to the start of
/// the next stage in the itinerary. A value of -1 indicates that the
/// next stage should start immediately after the current one.
/// For example:
///
/// { 1, x, -1 }
/// indicates that the stage occupies FU x for 1 cycle and that
/// the next stage starts immediately after this one.
///
/// { 2, x|y, 1 }
/// indicates that the stage occupies either FU x or FU y for 2
/// consecuative cycles and that the next stage starts one cycle
/// after this stage starts. That is, the stage requirements
/// overlap in time.
///
/// { 1, x, 0 }
/// indicates that the stage occupies FU x for 1 cycle and that
/// the next stage starts in this same cycle. This can be used to
/// indicate that the instruction requires multiple stages at the
/// same time.
///
struct InstrStage {
unsigned Cycles; ///< Length of stage in machine cycles
unsigned Units; ///< Choice of functional units
unsigned Cycles_; ///< Length of stage in machine cycles
unsigned Units_; ///< Choice of functional units
int NextCycles_; ///< Number of machine cycles to next stage
/// getCycles - returns the number of cycles the stage is occupied
unsigned getCycles() const {
return Cycles_;
}
/// getUnits - returns the choice of FUs
unsigned getUnits() const {
return Units_;
}
/// getNextCycles - returns the number of cycles from the start of
/// this stage to the start of the next stage in the itinerary
unsigned getNextCycles() const {
return (NextCycles_ >= 0) ? (unsigned)NextCycles_ : Cycles_;
}
};
@ -84,13 +124,17 @@ struct InstrItineraryData {
if (isEmpty())
return 1;
// Just sum the cycle count for each stage. The assumption is that all
// inputs are consumed at the start of the first stage and that all
// outputs are produced at the end of the last stage.
unsigned Latency = 0;
// Caclulate the maximum completion time for any stage. The
// assumption is that all inputs are consumed at the start of the
// first stage and that all outputs are produced at the end of the
// latest completing last stage.
unsigned Latency = 0, StartCycle = 0;
for (const InstrStage *IS = begin(ItinClassIndx), *E = end(ItinClassIndx);
IS != E; ++IS)
Latency += IS->Cycles;
IS != E; ++IS) {
Latency = std::max(Latency, StartCycle + IS->getCycles());
StartCycle += IS->getNextCycles();
}
return Latency;
}
};

View File

@ -23,14 +23,23 @@
class FuncUnit;
//===----------------------------------------------------------------------===//
// Instruction stage - These values represent a step in the execution of an
// instruction. The latency represents the number of discrete time slots used
// need to complete the stage. Units represent the choice of functional units
// that can be used to complete the stage. Eg. IntUnit1, IntUnit2.
// Instruction stage - These values represent a non-pipelined step in
// the execution of an instruction. Cycles represents the number of
// discrete time slots needed to complete the stage. Units represent
// the choice of functional units that can be used to complete the
// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many
// cycles should elapse from the start of this stage to the start of
// the next stage in the itinerary. For example:
//
class InstrStage<int cycles, list<FuncUnit> units> {
// A stage is specified in one of two ways:
//
// InstrStage<1, [FU_x, FU_y]> - TimeInc defaults to Cycles
// InstrStage<1, [FU_x, FU_y], 0> - TimeInc explicit
//
class InstrStage<int cycles, list<FuncUnit> units, int timeinc = -1> {
int Cycles = cycles; // length of stage in machine cycles
list<FuncUnit> Units = units; // choice of functional units
int TimeInc = timeinc; // cycles till start of next stage
}
//===----------------------------------------------------------------------===//

View File

@ -34,12 +34,12 @@ ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData
// If the begin stage of an itinerary has 0 cycles and units,
// then we have reached the end of the itineraries.
const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
if ((IS->Cycles == 0) && (IS->Units == 0))
if ((IS->getCycles() == 0) && (IS->getUnits() == 0))
break;
unsigned ItinDepth = 0;
for (; IS != E; ++IS)
ItinDepth += std::max(1U, IS->Cycles);
ItinDepth += IS->getCycles();
ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
}
@ -89,27 +89,25 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
IS != E; ++IS) {
// If the stages cycles are 0, then we must have the FU free in
// the current cycle, but we don't advance the cycle time .
unsigned StageCycles = std::max(1U, IS->Cycles);
// We must find one of the stage's units free for every cycle the
// stage is occupied.
for (unsigned int i = 0; i < StageCycles; ++i) {
assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
// stage is occupied. FIXME it would be more accurate to find the
// same unit free in all the cycles.
for (unsigned int i = 0; i < IS->getCycles(); ++i) {
assert(((cycle + i) < ScoreboardDepth) &&
"Scoreboard depth exceeded!");
unsigned index = getFutureIndex(cycle);
unsigned freeUnits = IS->Units & ~Scoreboard[index];
unsigned index = getFutureIndex(cycle + i);
unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
if (!freeUnits) {
DEBUG(errs() << "*** Hazard in cycle " << cycle << ", ");
DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", ");
DEBUG(errs() << "SU(" << SU->NodeNum << "): ");
DEBUG(SU->getInstr()->dump());
return Hazard;
}
if (IS->Cycles > 0)
++cycle;
}
// Advance the cycle to the next stage.
cycle += IS->getNextCycles();
}
return NoHazard;
@ -123,17 +121,15 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
IS != E; ++IS) {
// If the stages cycles are 0, then we must reserve the FU in the
// current cycle, but we don't advance the cycle time .
unsigned StageCycles = std::max(1U, IS->Cycles);
// We must reserve one of the stage's units for every cycle the
// stage is occupied.
for (unsigned int i = 0; i < StageCycles; ++i) {
assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
// stage is occupied. FIXME it would be more accurate to reserve
// the same unit free in all the cycles.
for (unsigned int i = 0; i < IS->getCycles(); ++i) {
assert(((cycle + i) < ScoreboardDepth) &&
"Scoreboard depth exceeded!");
unsigned index = getFutureIndex(cycle);
unsigned freeUnits = IS->Units & ~Scoreboard[index];
unsigned index = getFutureIndex(cycle + i);
unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
// reduce to a single unit
unsigned freeUnit = 0;
@ -144,10 +140,10 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
assert(freeUnit && "No function unit available!");
Scoreboard[index] |= freeUnit;
if (IS->Cycles > 0)
++cycle;
}
// Advance the cycle to the next stage.
cycle += IS->getNextCycles();
}
DEBUG(dumpScoreboard());

View File

@ -615,7 +615,7 @@ let isReturn = 1, isTerminator = 1, mayLoad = 1 in
[]>;
// On non-Darwin platforms R9 is callee-saved.
let isCall = 1, Itinerary = IIC_Br,
let isCall = 1,
Defs = [R0, R1, R2, R3, R12, LR,
D0, D1, D2, D3, D4, D5, D6, D7,
D16, D17, D18, D19, D20, D21, D22, D23,
@ -652,7 +652,7 @@ let isCall = 1, Itinerary = IIC_Br,
}
// On Darwin R9 is call-clobbered.
let isCall = 1, Itinerary = IIC_Br,
let isCall = 1,
Defs = [R0, R1, R2, R3, R9, R12, LR,
D0, D1, D2, D3, D4, D5, D6, D7,
D16, D17, D18, D19, D20, D21, D22, D23,
@ -685,7 +685,7 @@ let isCall = 1, Itinerary = IIC_Br,
}
}
let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in {
let isBranch = 1, isTerminator = 1 in {
// B is "predicable" since it can be xformed into a Bcc.
let isBarrier = 1 in {
let isPredicable = 1 in
@ -1057,7 +1057,7 @@ defm BIC : AsI1_bin_irs<0b1110, "bic",
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
AddrMode1, Size4Bytes, IndexModeNone, DPFrm, NoItinerary,
AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALU,
"bfc", " $dst, $imm", "$src = $dst",
[(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
Requires<[IsARM, HasV6T2]> {
@ -1084,16 +1084,16 @@ def : ARMPat<(and GPR:$src, so_imm_not:$imm),
//
let isCommutable = 1 in
def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"mul", " $dst, $a, $b",
[(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
IIC_iALU, "mla", " $dst, $a, $b, $c",
IIC_iMPY, "mla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
IIC_iALU, "mls", " $dst, $a, $b, $c",
IIC_iMPY, "mls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
Requires<[IsARM, HasV6T2]>;
@ -1101,32 +1101,32 @@ def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
(ins GPR:$a, GPR:$b), IIC_iALU,
(ins GPR:$a, GPR:$b), IIC_iMPY,
"smull", " $ldst, $hdst, $a, $b", []>;
def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
(ins GPR:$a, GPR:$b), IIC_iALU,
(ins GPR:$a, GPR:$b), IIC_iMPY,
"umull", " $ldst, $hdst, $a, $b", []>;
}
// Multiply + accumulate
def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
(ins GPR:$a, GPR:$b), IIC_iALU,
(ins GPR:$a, GPR:$b), IIC_iMPY,
"smlal", " $ldst, $hdst, $a, $b", []>;
def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst),
(ins GPR:$a, GPR:$b), IIC_iALU,
(ins GPR:$a, GPR:$b), IIC_iMPY,
"umlal", " $ldst, $hdst, $a, $b", []>;
def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
(ins GPR:$a, GPR:$b), IIC_iALU,
(ins GPR:$a, GPR:$b), IIC_iMPY,
"umaal", " $ldst, $hdst, $a, $b", []>,
Requires<[IsARM, HasV6]>;
} // neverHasSideEffects
// Most significant word multiply
def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iALU, "smmul", " $dst, $a, $b",
IIC_iMPY, "smmul", " $dst, $a, $b",
[(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>,
Requires<[IsARM, HasV6]> {
let Inst{7-4} = 0b0001;
@ -1134,7 +1134,7 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
}
def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
IIC_iALU, "smmla", " $dst, $a, $b, $c",
IIC_iMPY, "smmla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
Requires<[IsARM, HasV6]> {
let Inst{7-4} = 0b0001;
@ -1142,7 +1142,7 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
IIC_iALU, "smmls", " $dst, $a, $b, $c",
IIC_iMPY, "smmls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
Requires<[IsARM, HasV6]> {
let Inst{7-4} = 0b1101;
@ -1150,7 +1150,7 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
multiclass AI_smul<string opc, PatFrag opnode> {
def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b",
IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16)))]>,
Requires<[IsARM, HasV5TE]> {
@ -1159,7 +1159,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
}
def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b",
IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16))))]>,
Requires<[IsARM, HasV5TE]> {
@ -1168,7 +1168,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
}
def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b",
IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16)))]>,
Requires<[IsARM, HasV5TE]> {
@ -1177,7 +1177,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
}
def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b",
IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16))))]>,
Requires<[IsARM, HasV5TE]> {
@ -1186,7 +1186,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
}
def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b",
IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16)))]>,
Requires<[IsARM, HasV5TE]> {
@ -1195,7 +1195,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
}
def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b",
IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16)))]>,
Requires<[IsARM, HasV5TE]> {
@ -1207,7 +1207,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
multiclass AI_smla<string opc, PatFrag opnode> {
def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc,
(opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16))))]>,
@ -1217,7 +1217,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
}
def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16)))))]>,
Requires<[IsARM, HasV5TE]> {
@ -1226,7 +1226,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
}
def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16))))]>,
Requires<[IsARM, HasV5TE]> {
@ -1235,7 +1235,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
}
def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16)))))]>,
Requires<[IsARM, HasV5TE]> {
@ -1244,7 +1244,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
}
def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16))))]>,
Requires<[IsARM, HasV5TE]> {
@ -1253,7 +1253,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
}
def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16))))]>,
Requires<[IsARM, HasV5TE]> {

View File

@ -508,7 +508,7 @@ def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iALU,
// multiply register
let isCommutable = 1 in
def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU,
def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMPY,
"mul", " $dst, $rhs",
[(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;

View File

@ -808,80 +808,80 @@ def : T2Pat<(t2_so_imm_not:$src),
// Multiply Instructions.
//
let isCommutable = 1 in
def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"mul", " $dst, $a, $b",
[(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"mla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"mls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>;
// Extra precision multiplies with low / high results
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"smull", " $ldst, $hdst, $a, $b", []>;
def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"umull", " $ldst, $hdst, $a, $b", []>;
}
// Multiply + accumulate
def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"smlal", " $ldst, $hdst, $a, $b", []>;
def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"umlal", " $ldst, $hdst, $a, $b", []>;
def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU,
def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"umaal", " $ldst, $hdst, $a, $b", []>;
} // neverHasSideEffects
// Most significant word multiply
def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"smmul", " $dst, $a, $b",
[(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>;
def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"smmla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>;
def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU,
def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"smmls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>;
multiclass T2I_smul<string opc, PatFrag opnode> {
def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "bb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16)))]>;
def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "bt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16))))]>;
def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "tb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16)))]>;
def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "tt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16))))]>;
def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "wb"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16)))]>;
def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU,
def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "wt"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16)))]>;
@ -889,33 +889,33 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
multiclass T2I_smla<string opc, PatFrag opnode> {
def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "bb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc,
(opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16))))]>;
def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "bt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16)))))]>;
def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "tb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16))))]>;
def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "tt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16)))))]>;
def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "wb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16))))]>;
def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU,
def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "wt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16))))]>;

View File

@ -16,34 +16,34 @@ def CortexA8Itineraries : ProcessorItineraries<[
// two fully-pipelined integer ALU pipelines
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
// one fully-pipelined integer Multiply pipeline
// function units are used in alpha order, so use FU_Pipe1
// for the Multiple pipeline
InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe1]>]>,
// function units are reserved by the scheduler in reverse alpha order,
// so use FU_Pipe0 for the Multiple pipeline
InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>,
// loads have an extra cycle of latency, but are fully pipelined
// use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_iLoad , [InstrStage<0, [FU_Issue]>,
// use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>]>,
// fully-pipelined stores
// use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_iStore , [InstrStage<0, [FU_Issue]>,
// use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_iStore , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
// no delay slots, so the latency of a branch is unimportant
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
// VFP ALU is not pipelined so stall all issues
// FIXME assume NFP pipeline and 7 cycle non-pipelined latency
InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>,
// NFP ALU is not pipelined so stall all issues
InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0], 0>,
InstrStage<7, [FU_Pipe1], 0>]>,
// VFP MPY is not pipelined so stall all issues
// FIXME assume NFP pipeline and 7 cycle non-pipelined latency
InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>,
InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0], 0>,
InstrStage<7, [FU_Pipe1], 0>]>,
// loads have an extra cycle of latency, but are fully pipelined
// use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoad , [InstrStage<0, [FU_Issue]>,
// use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>]>,
// use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStore , [InstrStage<0, [FU_Issue]>,
// use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>
]>;

View File

@ -215,7 +215,7 @@ void SubtargetEmitter::FormItineraryString(Record *ItinData,
// Next stage
const Record *Stage = StageList[i];
// Form string as ,{ cycles, u1 | u2 | ... | un }
// Form string as ,{ cycles, u1 | u2 | ... | un, timeinc }
int Cycles = Stage->getValueAsInt("Cycles");
ItinString += " { " + itostr(Cycles) + ", ";
@ -229,6 +229,9 @@ void SubtargetEmitter::FormItineraryString(Record *ItinData,
if (++j < M) ItinString += " | ";
}
int TimeInc = Stage->getValueAsInt("TimeInc");
ItinString += ", " + itostr(TimeInc);
// Close off stage
ItinString += " }";
if (++i < N) ItinString += ", ";
@ -252,7 +255,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS,
// Begin stages table
OS << "static const llvm::InstrStage Stages[] = {\n"
" { 0, 0 }, // No itinerary\n";
" { 0, 0, 0 }, // No itinerary\n";
unsigned StageCount = 1;
unsigned ItinEnum = 1;
@ -289,7 +292,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS,
// If new itinerary
if (Find == 0) {
// Emit as { cycles, u1 | u2 | ... | un }, // index
// Emit as { cycles, u1 | u2 | ... | un, timeinc }, // index
OS << ItinString << ", // " << ItinEnum << "\n";
// Record Itin class number.
ItinMap[ItinString] = Find = StageCount;
@ -313,7 +316,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS,
}
// Closing stage
OS << " { 0, 0 } // End itinerary\n";
OS << " { 0, 0, 0 } // End itinerary\n";
// End stages table
OS << "};\n";