Enhance the InstrStage object to enable the specification of an Itinerary with overlapping stages. The default is to maintain the current behavior that the "next" stage immediately follows the previous one.

llvm-svn: 78827
This commit is contained in:
David Goodwin 2009-08-12 18:31:53 +00:00
parent 887cd6a8eb
commit b369ee4c48
8 changed files with 168 additions and 116 deletions

View File

@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// //
// This file describes the structures used for instruction itineraries and // This file describes the structures used for instruction itineraries and
// states. This is used by schedulers to determine instruction states and // stages. This is used by schedulers to determine instruction stages and
// latencies. // latencies.
// //
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -16,17 +16,57 @@
#ifndef LLVM_TARGET_TARGETINSTRITINERARIES_H #ifndef LLVM_TARGET_TARGETINSTRITINERARIES_H
#define LLVM_TARGET_TARGETINSTRITINERARIES_H #define LLVM_TARGET_TARGETINSTRITINERARIES_H
#include <algorithm>
namespace llvm { namespace llvm {
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
/// Instruction stage - These values represent a step in the execution of an /// Instruction stage - These values represent a non-pipelined step in
/// instruction. The latency represents the number of discrete time slots /// the execution of an instruction. Cycles represents the number of
/// needed to complete the stage. Units represent the choice of functional /// discrete time slots needed to complete the stage. Units represent
/// units that can be used to complete the stage. Eg. IntUnit1, IntUnit2. /// the choice of functional units that can be used to complete the
/// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many
/// cycles should elapse from the start of this stage to the start of
/// the next stage in the itinerary. A value of -1 indicates that the
/// next stage should start immediately after the current one.
/// For example:
///
/// { 1, x, -1 }
/// indicates that the stage occupies FU x for 1 cycle and that
/// the next stage starts immediately after this one.
///
/// { 2, x|y, 1 }
/// indicates that the stage occupies either FU x or FU y for 2
/// consecuative cycles and that the next stage starts one cycle
/// after this stage starts. That is, the stage requirements
/// overlap in time.
///
/// { 1, x, 0 }
/// indicates that the stage occupies FU x for 1 cycle and that
/// the next stage starts in this same cycle. This can be used to
/// indicate that the instruction requires multiple stages at the
/// same time.
/// ///
struct InstrStage { struct InstrStage {
unsigned Cycles; ///< Length of stage in machine cycles unsigned Cycles_; ///< Length of stage in machine cycles
unsigned Units; ///< Choice of functional units unsigned Units_; ///< Choice of functional units
int NextCycles_; ///< Number of machine cycles to next stage
/// getCycles - returns the number of cycles the stage is occupied
unsigned getCycles() const {
return Cycles_;
}
/// getUnits - returns the choice of FUs
unsigned getUnits() const {
return Units_;
}
/// getNextCycles - returns the number of cycles from the start of
/// this stage to the start of the next stage in the itinerary
unsigned getNextCycles() const {
return (NextCycles_ >= 0) ? (unsigned)NextCycles_ : Cycles_;
}
}; };
@ -84,13 +124,17 @@ struct InstrItineraryData {
if (isEmpty()) if (isEmpty())
return 1; return 1;
// Just sum the cycle count for each stage. The assumption is that all // Caclulate the maximum completion time for any stage. The
// inputs are consumed at the start of the first stage and that all // assumption is that all inputs are consumed at the start of the
// outputs are produced at the end of the last stage. // first stage and that all outputs are produced at the end of the
unsigned Latency = 0; // latest completing last stage.
unsigned Latency = 0, StartCycle = 0;
for (const InstrStage *IS = begin(ItinClassIndx), *E = end(ItinClassIndx); for (const InstrStage *IS = begin(ItinClassIndx), *E = end(ItinClassIndx);
IS != E; ++IS) IS != E; ++IS) {
Latency += IS->Cycles; Latency = std::max(Latency, StartCycle + IS->getCycles());
StartCycle += IS->getNextCycles();
}
return Latency; return Latency;
} }
}; };

View File

@ -23,14 +23,23 @@
class FuncUnit; class FuncUnit;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Instruction stage - These values represent a step in the execution of an // Instruction stage - These values represent a non-pipelined step in
// instruction. The latency represents the number of discrete time slots used // the execution of an instruction. Cycles represents the number of
// need to complete the stage. Units represent the choice of functional units // discrete time slots needed to complete the stage. Units represent
// that can be used to complete the stage. Eg. IntUnit1, IntUnit2. // the choice of functional units that can be used to complete the
// stage. Eg. IntUnit1, IntUnit2. NextCycles indicates how many
// cycles should elapse from the start of this stage to the start of
// the next stage in the itinerary. For example:
// //
class InstrStage<int cycles, list<FuncUnit> units> { // A stage is specified in one of two ways:
//
// InstrStage<1, [FU_x, FU_y]> - TimeInc defaults to Cycles
// InstrStage<1, [FU_x, FU_y], 0> - TimeInc explicit
//
class InstrStage<int cycles, list<FuncUnit> units, int timeinc = -1> {
int Cycles = cycles; // length of stage in machine cycles int Cycles = cycles; // length of stage in machine cycles
list<FuncUnit> Units = units; // choice of functional units list<FuncUnit> Units = units; // choice of functional units
int TimeInc = timeinc; // cycles till start of next stage
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//

View File

@ -34,12 +34,12 @@ ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData
// If the begin stage of an itinerary has 0 cycles and units, // If the begin stage of an itinerary has 0 cycles and units,
// then we have reached the end of the itineraries. // then we have reached the end of the itineraries.
const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
if ((IS->Cycles == 0) && (IS->Units == 0)) if ((IS->getCycles() == 0) && (IS->getUnits() == 0))
break; break;
unsigned ItinDepth = 0; unsigned ItinDepth = 0;
for (; IS != E; ++IS) for (; IS != E; ++IS)
ItinDepth += std::max(1U, IS->Cycles); ItinDepth += IS->getCycles();
ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth); ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
} }
@ -89,27 +89,25 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
unsigned idx = SU->getInstr()->getDesc().getSchedClass(); unsigned idx = SU->getInstr()->getDesc().getSchedClass();
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
IS != E; ++IS) { IS != E; ++IS) {
// If the stages cycles are 0, then we must have the FU free in
// the current cycle, but we don't advance the cycle time .
unsigned StageCycles = std::max(1U, IS->Cycles);
// We must find one of the stage's units free for every cycle the // We must find one of the stage's units free for every cycle the
// stage is occupied. // stage is occupied. FIXME it would be more accurate to find the
for (unsigned int i = 0; i < StageCycles; ++i) { // same unit free in all the cycles.
assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!"); for (unsigned int i = 0; i < IS->getCycles(); ++i) {
assert(((cycle + i) < ScoreboardDepth) &&
"Scoreboard depth exceeded!");
unsigned index = getFutureIndex(cycle); unsigned index = getFutureIndex(cycle + i);
unsigned freeUnits = IS->Units & ~Scoreboard[index]; unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
if (!freeUnits) { if (!freeUnits) {
DEBUG(errs() << "*** Hazard in cycle " << cycle << ", "); DEBUG(errs() << "*** Hazard in cycle " << (cycle + i) << ", ");
DEBUG(errs() << "SU(" << SU->NodeNum << "): "); DEBUG(errs() << "SU(" << SU->NodeNum << "): ");
DEBUG(SU->getInstr()->dump()); DEBUG(SU->getInstr()->dump());
return Hazard; return Hazard;
} }
if (IS->Cycles > 0)
++cycle;
} }
// Advance the cycle to the next stage.
cycle += IS->getNextCycles();
} }
return NoHazard; return NoHazard;
@ -123,17 +121,15 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
unsigned idx = SU->getInstr()->getDesc().getSchedClass(); unsigned idx = SU->getInstr()->getDesc().getSchedClass();
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx); for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
IS != E; ++IS) { IS != E; ++IS) {
// If the stages cycles are 0, then we must reserve the FU in the
// current cycle, but we don't advance the cycle time .
unsigned StageCycles = std::max(1U, IS->Cycles);
// We must reserve one of the stage's units for every cycle the // We must reserve one of the stage's units for every cycle the
// stage is occupied. // stage is occupied. FIXME it would be more accurate to reserve
for (unsigned int i = 0; i < StageCycles; ++i) { // the same unit free in all the cycles.
assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!"); for (unsigned int i = 0; i < IS->getCycles(); ++i) {
assert(((cycle + i) < ScoreboardDepth) &&
"Scoreboard depth exceeded!");
unsigned index = getFutureIndex(cycle); unsigned index = getFutureIndex(cycle + i);
unsigned freeUnits = IS->Units & ~Scoreboard[index]; unsigned freeUnits = IS->getUnits() & ~Scoreboard[index];
// reduce to a single unit // reduce to a single unit
unsigned freeUnit = 0; unsigned freeUnit = 0;
@ -144,10 +140,10 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
assert(freeUnit && "No function unit available!"); assert(freeUnit && "No function unit available!");
Scoreboard[index] |= freeUnit; Scoreboard[index] |= freeUnit;
if (IS->Cycles > 0)
++cycle;
} }
// Advance the cycle to the next stage.
cycle += IS->getNextCycles();
} }
DEBUG(dumpScoreboard()); DEBUG(dumpScoreboard());

View File

@ -615,7 +615,7 @@ let isReturn = 1, isTerminator = 1, mayLoad = 1 in
[]>; []>;
// On non-Darwin platforms R9 is callee-saved. // On non-Darwin platforms R9 is callee-saved.
let isCall = 1, Itinerary = IIC_Br, let isCall = 1,
Defs = [R0, R1, R2, R3, R12, LR, Defs = [R0, R1, R2, R3, R12, LR,
D0, D1, D2, D3, D4, D5, D6, D7, D0, D1, D2, D3, D4, D5, D6, D7,
D16, D17, D18, D19, D20, D21, D22, D23, D16, D17, D18, D19, D20, D21, D22, D23,
@ -652,7 +652,7 @@ let isCall = 1, Itinerary = IIC_Br,
} }
// On Darwin R9 is call-clobbered. // On Darwin R9 is call-clobbered.
let isCall = 1, Itinerary = IIC_Br, let isCall = 1,
Defs = [R0, R1, R2, R3, R9, R12, LR, Defs = [R0, R1, R2, R3, R9, R12, LR,
D0, D1, D2, D3, D4, D5, D6, D7, D0, D1, D2, D3, D4, D5, D6, D7,
D16, D17, D18, D19, D20, D21, D22, D23, D16, D17, D18, D19, D20, D21, D22, D23,
@ -685,7 +685,7 @@ let isCall = 1, Itinerary = IIC_Br,
} }
} }
let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in { let isBranch = 1, isTerminator = 1 in {
// B is "predicable" since it can be xformed into a Bcc. // B is "predicable" since it can be xformed into a Bcc.
let isBarrier = 1 in { let isBarrier = 1 in {
let isPredicable = 1 in let isPredicable = 1 in
@ -1057,7 +1057,7 @@ defm BIC : AsI1_bin_irs<0b1110, "bic",
BinOpFrag<(and node:$LHS, (not node:$RHS))>>; BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
AddrMode1, Size4Bytes, IndexModeNone, DPFrm, NoItinerary, AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iALU,
"bfc", " $dst, $imm", "$src = $dst", "bfc", " $dst, $imm", "$src = $dst",
[(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>, [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
Requires<[IsARM, HasV6T2]> { Requires<[IsARM, HasV6T2]> {
@ -1084,16 +1084,16 @@ def : ARMPat<(and GPR:$src, so_imm_not:$imm),
// //
let isCommutable = 1 in let isCommutable = 1 in
def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, def MUL : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"mul", " $dst, $a, $b", "mul", " $dst, $a, $b",
[(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
IIC_iALU, "mla", " $dst, $a, $b, $c", IIC_iMPY, "mla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
IIC_iALU, "mls", " $dst, $a, $b, $c", IIC_iMPY, "mls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>, [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
Requires<[IsARM, HasV6T2]>; Requires<[IsARM, HasV6T2]>;
@ -1101,32 +1101,32 @@ def MLS : AMul1I<0b0000011, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
let neverHasSideEffects = 1 in { let neverHasSideEffects = 1 in {
let isCommutable = 1 in { let isCommutable = 1 in {
def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst), def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
(ins GPR:$a, GPR:$b), IIC_iALU, (ins GPR:$a, GPR:$b), IIC_iMPY,
"smull", " $ldst, $hdst, $a, $b", []>; "smull", " $ldst, $hdst, $a, $b", []>;
def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst), def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
(ins GPR:$a, GPR:$b), IIC_iALU, (ins GPR:$a, GPR:$b), IIC_iMPY,
"umull", " $ldst, $hdst, $a, $b", []>; "umull", " $ldst, $hdst, $a, $b", []>;
} }
// Multiply + accumulate // Multiply + accumulate
def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst), def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
(ins GPR:$a, GPR:$b), IIC_iALU, (ins GPR:$a, GPR:$b), IIC_iMPY,
"smlal", " $ldst, $hdst, $a, $b", []>; "smlal", " $ldst, $hdst, $a, $b", []>;
def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst), def UMLAL : AsMul1I<0b0000101, (outs GPR:$ldst, GPR:$hdst),
(ins GPR:$a, GPR:$b), IIC_iALU, (ins GPR:$a, GPR:$b), IIC_iMPY,
"umlal", " $ldst, $hdst, $a, $b", []>; "umlal", " $ldst, $hdst, $a, $b", []>;
def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst), def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
(ins GPR:$a, GPR:$b), IIC_iALU, (ins GPR:$a, GPR:$b), IIC_iMPY,
"umaal", " $ldst, $hdst, $a, $b", []>, "umaal", " $ldst, $hdst, $a, $b", []>,
Requires<[IsARM, HasV6]>; Requires<[IsARM, HasV6]>;
} // neverHasSideEffects } // neverHasSideEffects
// Most significant word multiply // Most significant word multiply
def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iALU, "smmul", " $dst, $a, $b", IIC_iMPY, "smmul", " $dst, $a, $b",
[(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>, [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>,
Requires<[IsARM, HasV6]> { Requires<[IsARM, HasV6]> {
let Inst{7-4} = 0b0001; let Inst{7-4} = 0b0001;
@ -1134,7 +1134,7 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
} }
def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
IIC_iALU, "smmla", " $dst, $a, $b, $c", IIC_iMPY, "smmla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>, [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
Requires<[IsARM, HasV6]> { Requires<[IsARM, HasV6]> {
let Inst{7-4} = 0b0001; let Inst{7-4} = 0b0001;
@ -1142,7 +1142,7 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
IIC_iALU, "smmls", " $dst, $a, $b, $c", IIC_iMPY, "smmls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>, [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>,
Requires<[IsARM, HasV6]> { Requires<[IsARM, HasV6]> {
let Inst{7-4} = 0b1101; let Inst{7-4} = 0b1101;
@ -1150,7 +1150,7 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
multiclass AI_smul<string opc, PatFrag opnode> { multiclass AI_smul<string opc, PatFrag opnode> {
def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b", IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16)))]>, (sext_inreg GPR:$b, i16)))]>,
Requires<[IsARM, HasV5TE]> { Requires<[IsARM, HasV5TE]> {
@ -1159,7 +1159,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
} }
def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), def BT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b", IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16))))]>, (sra GPR:$b, (i32 16))))]>,
Requires<[IsARM, HasV5TE]> { Requires<[IsARM, HasV5TE]> {
@ -1168,7 +1168,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
} }
def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), def TB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b", IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16)))]>, (sext_inreg GPR:$b, i16)))]>,
Requires<[IsARM, HasV5TE]> { Requires<[IsARM, HasV5TE]> {
@ -1177,7 +1177,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
} }
def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), def TT : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b", IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16))))]>, (sra GPR:$b, (i32 16))))]>,
Requires<[IsARM, HasV5TE]> { Requires<[IsARM, HasV5TE]> {
@ -1186,7 +1186,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
} }
def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b", IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a, [(set GPR:$dst, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16)))]>, (sext_inreg GPR:$b, i16)), (i32 16)))]>,
Requires<[IsARM, HasV5TE]> { Requires<[IsARM, HasV5TE]> {
@ -1195,7 +1195,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
} }
def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b), def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b", IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a, [(set GPR:$dst, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16)))]>, (sra GPR:$b, (i32 16))), (i32 16)))]>,
Requires<[IsARM, HasV5TE]> { Requires<[IsARM, HasV5TE]> {
@ -1207,7 +1207,7 @@ multiclass AI_smul<string opc, PatFrag opnode> {
multiclass AI_smla<string opc, PatFrag opnode> { multiclass AI_smla<string opc, PatFrag opnode> {
def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), def BB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
IIC_iALU, !strconcat(opc, "bb"), " $dst, $a, $b, $acc", IIC_iMPY, !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, [(set GPR:$dst, (add GPR:$acc,
(opnode (sext_inreg GPR:$a, i16), (opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16))))]>, (sext_inreg GPR:$b, i16))))]>,
@ -1217,7 +1217,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
} }
def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
IIC_iALU, !strconcat(opc, "bt"), " $dst, $a, $b, $acc", IIC_iMPY, !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16)))))]>, (sra GPR:$b, (i32 16)))))]>,
Requires<[IsARM, HasV5TE]> { Requires<[IsARM, HasV5TE]> {
@ -1226,7 +1226,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
} }
def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), def TB : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
IIC_iALU, !strconcat(opc, "tb"), " $dst, $a, $b, $acc", IIC_iMPY, !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16))))]>, (sext_inreg GPR:$b, i16))))]>,
Requires<[IsARM, HasV5TE]> { Requires<[IsARM, HasV5TE]> {
@ -1235,7 +1235,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
} }
def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), def TT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
IIC_iALU, !strconcat(opc, "tt"), " $dst, $a, $b, $acc", IIC_iMPY, !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16)))))]>, (sra GPR:$b, (i32 16)))))]>,
Requires<[IsARM, HasV5TE]> { Requires<[IsARM, HasV5TE]> {
@ -1244,7 +1244,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
} }
def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), def WB : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
IIC_iALU, !strconcat(opc, "wb"), " $dst, $a, $b, $acc", IIC_iMPY, !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16))))]>, (sext_inreg GPR:$b, i16)), (i32 16))))]>,
Requires<[IsARM, HasV5TE]> { Requires<[IsARM, HasV5TE]> {
@ -1253,7 +1253,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
} }
def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), def WT : AMulxyI<0b0001001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
IIC_iALU, !strconcat(opc, "wt"), " $dst, $a, $b, $acc", IIC_iMPY, !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16))))]>, (sra GPR:$b, (i32 16))), (i32 16))))]>,
Requires<[IsARM, HasV5TE]> { Requires<[IsARM, HasV5TE]> {

View File

@ -508,7 +508,7 @@ def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iALU,
// multiply register // multiply register
let isCommutable = 1 in let isCommutable = 1 in
def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU, def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMPY,
"mul", " $dst, $rhs", "mul", " $dst, $rhs",
[(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>; [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;

View File

@ -808,80 +808,80 @@ def : T2Pat<(t2_so_imm_not:$src),
// Multiply Instructions. // Multiply Instructions.
// //
let isCommutable = 1 in let isCommutable = 1 in
def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"mul", " $dst, $a, $b", "mul", " $dst, $a, $b",
[(set GPR:$dst, (mul GPR:$a, GPR:$b))]>; [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"mla", " $dst, $a, $b, $c", "mla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"mls", " $dst, $a, $b, $c", "mls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>; [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>;
// Extra precision multiplies with low / high results // Extra precision multiplies with low / high results
let neverHasSideEffects = 1 in { let neverHasSideEffects = 1 in {
let isCommutable = 1 in { let isCommutable = 1 in {
def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"smull", " $ldst, $hdst, $a, $b", []>; "smull", " $ldst, $hdst, $a, $b", []>;
def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"umull", " $ldst, $hdst, $a, $b", []>; "umull", " $ldst, $hdst, $a, $b", []>;
} }
// Multiply + accumulate // Multiply + accumulate
def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"smlal", " $ldst, $hdst, $a, $b", []>; "smlal", " $ldst, $hdst, $a, $b", []>;
def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"umlal", " $ldst, $hdst, $a, $b", []>; "umlal", " $ldst, $hdst, $a, $b", []>;
def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iALU, def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"umaal", " $ldst, $hdst, $a, $b", []>; "umaal", " $ldst, $hdst, $a, $b", []>;
} // neverHasSideEffects } // neverHasSideEffects
// Most significant word multiply // Most significant word multiply
def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
"smmul", " $dst, $a, $b", "smmul", " $dst, $a, $b",
[(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>; [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]>;
def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"smmla", " $dst, $a, $b, $c", "smmla", " $dst, $a, $b, $c",
[(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>; [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>;
def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iALU, def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMPY,
"smmls", " $dst, $a, $b, $c", "smmls", " $dst, $a, $b, $c",
[(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>; [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]>;
multiclass T2I_smul<string opc, PatFrag opnode> { multiclass T2I_smul<string opc, PatFrag opnode> {
def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "bb"), " $dst, $a, $b", !strconcat(opc, "bb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16)))]>; (sext_inreg GPR:$b, i16)))]>;
def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "bt"), " $dst, $a, $b", !strconcat(opc, "bt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16), [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16))))]>; (sra GPR:$b, (i32 16))))]>;
def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "tb"), " $dst, $a, $b", !strconcat(opc, "tb"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16)))]>; (sext_inreg GPR:$b, i16)))]>;
def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "tt"), " $dst, $a, $b", !strconcat(opc, "tt"), " $dst, $a, $b",
[(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)), [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16))))]>; (sra GPR:$b, (i32 16))))]>;
def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "wb"), " $dst, $a, $b", !strconcat(opc, "wb"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a, [(set GPR:$dst, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16)))]>; (sext_inreg GPR:$b, i16)), (i32 16)))]>;
def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALU, def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMPY,
!strconcat(opc, "wt"), " $dst, $a, $b", !strconcat(opc, "wt"), " $dst, $a, $b",
[(set GPR:$dst, (sra (opnode GPR:$a, [(set GPR:$dst, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16)))]>; (sra GPR:$b, (i32 16))), (i32 16)))]>;
@ -889,33 +889,33 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
multiclass T2I_smla<string opc, PatFrag opnode> { multiclass T2I_smla<string opc, PatFrag opnode> {
def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "bb"), " $dst, $a, $b, $acc", !strconcat(opc, "bb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, [(set GPR:$dst, (add GPR:$acc,
(opnode (sext_inreg GPR:$a, i16), (opnode (sext_inreg GPR:$a, i16),
(sext_inreg GPR:$b, i16))))]>; (sext_inreg GPR:$b, i16))))]>;
def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "bt"), " $dst, $a, $b, $acc", !strconcat(opc, "bt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16), [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
(sra GPR:$b, (i32 16)))))]>; (sra GPR:$b, (i32 16)))))]>;
def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "tb"), " $dst, $a, $b, $acc", !strconcat(opc, "tb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sext_inreg GPR:$b, i16))))]>; (sext_inreg GPR:$b, i16))))]>;
def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "tt"), " $dst, $a, $b, $acc", !strconcat(opc, "tt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)), [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
(sra GPR:$b, (i32 16)))))]>; (sra GPR:$b, (i32 16)))))]>;
def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "wb"), " $dst, $a, $b, $acc", !strconcat(opc, "wb"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sext_inreg GPR:$b, i16)), (i32 16))))]>; (sext_inreg GPR:$b, i16)), (i32 16))))]>;
def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iALU, def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMPY,
!strconcat(opc, "wt"), " $dst, $a, $b, $acc", !strconcat(opc, "wt"), " $dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a, [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
(sra GPR:$b, (i32 16))), (i32 16))))]>; (sra GPR:$b, (i32 16))), (i32 16))))]>;

View File

@ -16,34 +16,34 @@ def CortexA8Itineraries : ProcessorItineraries<[
// two fully-pipelined integer ALU pipelines // two fully-pipelined integer ALU pipelines
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
// one fully-pipelined integer Multiply pipeline // one fully-pipelined integer Multiply pipeline
// function units are used in alpha order, so use FU_Pipe1 // function units are reserved by the scheduler in reverse alpha order,
// for the Multiple pipeline // so use FU_Pipe0 for the Multiple pipeline
InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe1]>]>, InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>,
// loads have an extra cycle of latency, but are fully pipelined // loads have an extra cycle of latency, but are fully pipelined
// use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit // use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_iLoad , [InstrStage<0, [FU_Issue]>, InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>]>, InstrStage<1, [FU_LdSt0]>]>,
// fully-pipelined stores // fully-pipelined stores
// use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit // use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_iStore , [InstrStage<0, [FU_Issue]>, InstrItinData<IIC_iStore , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
// no delay slots, so the latency of a branch is unimportant // no delay slots, so the latency of a branch is unimportant
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>, InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
// VFP ALU is not pipelined so stall all issues // NFP ALU is not pipelined so stall all issues
// FIXME assume NFP pipeline and 7 cycle non-pipelined latency InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0], 0>,
InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>, InstrStage<7, [FU_Pipe1], 0>]>,
// VFP MPY is not pipelined so stall all issues // VFP MPY is not pipelined so stall all issues
// FIXME assume NFP pipeline and 7 cycle non-pipelined latency InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0], 0>,
InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>, InstrStage<7, [FU_Pipe1], 0>]>,
// loads have an extra cycle of latency, but are fully pipelined // loads have an extra cycle of latency, but are fully pipelined
// use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit // use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoad , [InstrStage<0, [FU_Issue]>, InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
InstrStage<1, [FU_LdSt0]>]>, InstrStage<1, [FU_LdSt0]>]>,
// use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit // use FU_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStore , [InstrStage<0, [FU_Issue]>, InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Issue], 0>,
InstrStage<1, [FU_Pipe0, FU_Pipe1]>]> InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>
]>; ]>;

View File

@ -215,7 +215,7 @@ void SubtargetEmitter::FormItineraryString(Record *ItinData,
// Next stage // Next stage
const Record *Stage = StageList[i]; const Record *Stage = StageList[i];
// Form string as ,{ cycles, u1 | u2 | ... | un } // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc }
int Cycles = Stage->getValueAsInt("Cycles"); int Cycles = Stage->getValueAsInt("Cycles");
ItinString += " { " + itostr(Cycles) + ", "; ItinString += " { " + itostr(Cycles) + ", ";
@ -229,6 +229,9 @@ void SubtargetEmitter::FormItineraryString(Record *ItinData,
if (++j < M) ItinString += " | "; if (++j < M) ItinString += " | ";
} }
int TimeInc = Stage->getValueAsInt("TimeInc");
ItinString += ", " + itostr(TimeInc);
// Close off stage // Close off stage
ItinString += " }"; ItinString += " }";
if (++i < N) ItinString += ", "; if (++i < N) ItinString += ", ";
@ -252,7 +255,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS,
// Begin stages table // Begin stages table
OS << "static const llvm::InstrStage Stages[] = {\n" OS << "static const llvm::InstrStage Stages[] = {\n"
" { 0, 0 }, // No itinerary\n"; " { 0, 0, 0 }, // No itinerary\n";
unsigned StageCount = 1; unsigned StageCount = 1;
unsigned ItinEnum = 1; unsigned ItinEnum = 1;
@ -289,7 +292,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS,
// If new itinerary // If new itinerary
if (Find == 0) { if (Find == 0) {
// Emit as { cycles, u1 | u2 | ... | un }, // index // Emit as { cycles, u1 | u2 | ... | un, timeinc }, // index
OS << ItinString << ", // " << ItinEnum << "\n"; OS << ItinString << ", // " << ItinEnum << "\n";
// Record Itin class number. // Record Itin class number.
ItinMap[ItinString] = Find = StageCount; ItinMap[ItinString] = Find = StageCount;
@ -313,7 +316,7 @@ void SubtargetEmitter::EmitStageData(raw_ostream &OS,
} }
// Closing stage // Closing stage
OS << " { 0, 0 } // End itinerary\n"; OS << " { 0, 0, 0 } // End itinerary\n";
// End stages table // End stages table
OS << "};\n"; OS << "};\n";