Fix preload instruction isel. Only v7 supports pli, and only v7 with mp extension supports pldw. Add subtarget attribute to denote mp extension support and legalize illegal ones to nothing.

llvm-svn: 118160
This commit is contained in:
Evan Cheng 2010-11-03 06:34:55 +00:00
parent 3ab17ad8ec
commit 8740ee3637
12 changed files with 103 additions and 56 deletions

View File

@ -183,18 +183,18 @@ def SDTVecInsert : SDTypeProfile<1, 3, [ // vector insert
SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3>
]>;
def STDPrefetch : SDTypeProfile<0, 3, [ // prefetch
def SDTPrefetch : SDTypeProfile<0, 3, [ // prefetch
SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisInt<1>
]>;
def STDMemBarrier : SDTypeProfile<0, 5, [ // memory barier
def SDTMemBarrier : SDTypeProfile<0, 5, [ // memory barier
SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisSameAs<0,4>,
SDTCisInt<0>
]>;
def STDAtomic3 : SDTypeProfile<1, 3, [
def SDTAtomic3 : SDTypeProfile<1, 3, [
SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisInt<0>, SDTCisPtrTy<1>
]>;
def STDAtomic2 : SDTypeProfile<1, 2, [
def SDTAtomic2 : SDTypeProfile<1, 2, [
SDTCisSameAs<0,2>, SDTCisInt<0>, SDTCisPtrTy<1>
]>;
@ -374,35 +374,35 @@ def br : SDNode<"ISD::BR" , SDTBr, [SDNPHasChain]>;
def trap : SDNode<"ISD::TRAP" , SDTNone,
[SDNPHasChain, SDNPSideEffect]>;
def prefetch : SDNode<"ISD::PREFETCH" , STDPrefetch,
def prefetch : SDNode<"ISD::PREFETCH" , SDTPrefetch,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
def membarrier : SDNode<"ISD::MEMBARRIER" , STDMemBarrier,
def membarrier : SDNode<"ISD::MEMBARRIER" , SDTMemBarrier,
[SDNPHasChain, SDNPSideEffect]>;
def atomic_cmp_swap : SDNode<"ISD::ATOMIC_CMP_SWAP" , STDAtomic3,
def atomic_cmp_swap : SDNode<"ISD::ATOMIC_CMP_SWAP" , SDTAtomic3,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_add : SDNode<"ISD::ATOMIC_LOAD_ADD" , STDAtomic2,
def atomic_load_add : SDNode<"ISD::ATOMIC_LOAD_ADD" , SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", STDAtomic2,
def atomic_swap : SDNode<"ISD::ATOMIC_SWAP", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_sub : SDNode<"ISD::ATOMIC_LOAD_SUB" , STDAtomic2,
def atomic_load_sub : SDNode<"ISD::ATOMIC_LOAD_SUB" , SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , STDAtomic2,
def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_or : SDNode<"ISD::ATOMIC_LOAD_OR" , STDAtomic2,
def atomic_load_or : SDNode<"ISD::ATOMIC_LOAD_OR" , SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , STDAtomic2,
def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_nand: SDNode<"ISD::ATOMIC_LOAD_NAND", STDAtomic2,
def atomic_load_nand: SDNode<"ISD::ATOMIC_LOAD_NAND", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", STDAtomic2,
def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", STDAtomic2,
def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", STDAtomic2,
def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", STDAtomic2,
def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", SDTAtomic2,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
// Do not use ld, st directly. Use load, extload, sextload, zextload, store,

View File

@ -64,6 +64,9 @@ def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
"Prefer 32-bit Thumb instrs">;
// Multiprocessing extension.
def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
"Supports Multiprocessing extension">;
// ARM architectures.
def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",

View File

@ -598,10 +598,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand);
setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
// ARM v5TE+ and Thumb2 has preload instructions.
if (Subtarget->isThumb2() ||
(!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))
setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
// Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
if (!Subtarget->hasV6Ops()) {
@ -777,6 +774,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCGE: return "ARMISD::VCGE";
case ARMISD::VCGEU: return "ARMISD::VCGEU";
@ -2060,6 +2059,31 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(DMBOpt, MVT::i32));
}
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
// ARM pre v5TE and Thumb1 does not have preload instructions.
if (!(Subtarget->isThumb2() ||
(!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
// Just preserve the chain.
return Op.getOperand(0);
DebugLoc dl = Op.getDebugLoc();
unsigned Flavor = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
if (Flavor != 3) {
if (!Subtarget->hasV7Ops())
return Op.getOperand(0);
else if (Flavor == 2 && !Subtarget->hasMPExtension())
return Op.getOperand(0);
}
if (Subtarget->isThumb())
// Invert the bits.
Flavor = ~Flavor & 0x3;
return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
Op.getOperand(1), DAG.getConstant(Flavor, MVT::i32));
}
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
@ -3842,6 +3866,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT:

View File

@ -84,6 +84,8 @@ namespace llvm {
MEMBARRIER, // Memory barrier (DMB)
MEMBARRIER_MCR, // Memory barrier (MCR)
PRELOAD, // Preload
VCEQ, // Vector compare equal.
VCGE, // Vector compare greater than or equal.
VCGEU, // Vector compare unsigned greater than or equal.

View File

@ -62,6 +62,8 @@ def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMPRELOAD : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
@ -130,6 +132,8 @@ def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
[SDNPHasChain]>;
def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
[SDNPHasChain]>;
def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPRELOAD,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
@ -159,6 +163,8 @@ def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
AssemblerPredicate;
def HasDB : Predicate<"Subtarget->hasDataBarrier()">,
AssemblerPredicate;
def HasMP : Predicate<"Subtarget->hasMPExtension()">,
AssemblerPredicate;
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
def IsThumb : Predicate<"Subtarget->isThumb()">, AssemblerPredicate;
@ -988,14 +994,11 @@ def CPS : AXI<(outs), (ins cps_opt:$opt), MiscFrm, NoItinerary, "cps$opt",
// Preload signals the memory system of possible future data/instruction access.
// These are for disassembly only.
//
// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0.
// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
multiclass APreLoad<bits<2> data_read, string opc> {
def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, NoItinerary,
def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
!strconcat(opc, "\t$addr"),
[(prefetch addrmode_imm12:$addr, imm, (i32 data_read))]> {
[(ARMPreload addrmode_imm12:$addr, (i32 data_read))]> {
bits<4> Rt;
bits<17> addr;
let Inst{31-26} = 0b111101;
@ -1009,9 +1012,9 @@ multiclass APreLoad<bits<2> data_read, string opc> {
let Inst{11-0} = addr{11-0}; // imm12
}
def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, NoItinerary,
def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload,
!strconcat(opc, "\t$shift"),
[(prefetch ldst_so_reg:$shift, imm, (i32 data_read))]> {
[(ARMPreload ldst_so_reg:$shift, (i32 data_read))]> {
bits<4> Rt;
bits<17> shift;
let Inst{31-26} = 0b111101;
@ -1025,9 +1028,9 @@ multiclass APreLoad<bits<2> data_read, string opc> {
}
}
defm PLD : APreLoad<3, "pld">;
defm PLDW : APreLoad<2, "pldw">;
defm PLI : APreLoad<1, "pli">;
defm PLD : APreLoad<3, "pld">, Requires<[IsARM]>;
defm PLDW : APreLoad<2, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
defm PLI : APreLoad<1, "pli">, Requires<[IsARM,HasV7]>;
def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
"setend\t$end",

View File

@ -1171,67 +1171,66 @@ def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs),
// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
// data/instruction access. These are for disassembly only.
//
// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0.
// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
multiclass T2Ipl<bit instr, bit write, bits<2> data_read, string opc> {
// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
// (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1).
multiclass T2Ipl<bits<2> instr_write, string opc> {
def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoad_i, opc,
def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc,
"\t$addr",
[(prefetch t2addrmode_imm12:$addr, imm, (i32 data_read))]> {
[(ARMPreload t2addrmode_imm12:$addr, (i32 instr_write))]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
let Inst{24} = instr_write{1};
let Inst{23} = 1; // U = 1
let Inst{22} = 0;
let Inst{21} = write;
let Inst{21} = instr_write{0};
let Inst{20} = 1;
let Inst{15-12} = 0b1111;
}
def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoad_i, opc,
def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc,
"\t$addr",
[(prefetch t2addrmode_imm8:$addr, imm, (i32 data_read))]> {
[(ARMPreload t2addrmode_imm8:$addr, (i32 instr_write))]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
let Inst{24} = instr_write{1};
let Inst{23} = 0; // U = 0
let Inst{22} = 0;
let Inst{21} = write;
let Inst{21} = instr_write{0};
let Inst{20} = 1;
let Inst{15-12} = 0b1111;
let Inst{11-8} = 0b1100;
}
def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_iLoad_i, opc,
def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc,
"\t$addr",
[(prefetch t2addrmode_so_reg:$addr, imm, (i32 data_read))]> {
[(ARMPreload t2addrmode_so_reg:$addr, (i32 instr_write))]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
let Inst{24} = instr_write{1};
let Inst{23} = 0; // add = TRUE for T1
let Inst{22} = 0;
let Inst{21} = write;
let Inst{21} = instr_write{0};
let Inst{20} = 1;
let Inst{15-12} = 0b1111;
let Inst{11-6} = 0000000;
}
let isCodeGenOnly = 1 in
def pci : T2Ipc<(outs), (ins i32imm:$addr), IIC_iLoad_i, opc,
def pci : T2Ipc<(outs), (ins i32imm:$addr), IIC_Preload, opc,
"\t$addr",
[]> {
let Inst{31-25} = 0b1111100;
let Inst{24} = instr;
let Inst{24} = instr_write{1};
let Inst{23} = ?; // add = (U == 1)
let Inst{22} = 0;
let Inst{21} = write;
let Inst{21} = instr_write{0};
let Inst{20} = 1;
let Inst{19-16} = 0b1111; // Rn = 0b1111
let Inst{15-12} = 0b1111;
}
}
defm t2PLD : T2Ipl<0, 0, 3, "pld">;
defm t2PLDW : T2Ipl<0, 1, 2, "pldw">;
defm t2PLI : T2Ipl<1, 0, 1, "pli">;
defm t2PLD : T2Ipl<0, "pld">, Requires<[IsThumb2]>;
defm t2PLDW : T2Ipl<1, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
defm t2PLI : T2Ipl<2, "pli">, Requires<[IsThumb2,HasV7]>;
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.

View File

@ -90,6 +90,7 @@ def IIC_iStore_d_r : InstrItinClass;
def IIC_iStore_d_ru : InstrItinClass;
def IIC_iStore_m : InstrItinClass<0>; // micro-coded
def IIC_iStore_mu : InstrItinClass<0>; // micro-coded
def IIC_Preload : InstrItinClass;
def IIC_Br : InstrItinClass;
def IIC_fpSTAT : InstrItinClass;
def IIC_fpUNA32 : InstrItinClass;

View File

@ -225,6 +225,10 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_LSPipe]>], [2]>,
//
// Preload
InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
// Branch
//
// no delay slots, so the latency of a branch is unimportant

View File

@ -402,6 +402,10 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<1, [A9_AGU], 0>,
InstrStage<2, [A9_LSUnit]>], [2]>,
//
// Preload
InstrItinData<IIC_Preload, [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
// Branch
//
// no delay slots, so the latency of a branch is unimportant

View File

@ -51,6 +51,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
, HasT2ExtractPack(false)
, HasDataBarrier(false)
, Pref32BitThumb(false)
, HasMPExtension(false)
, FPOnlySP(false)
, AllowsUnalignedMem(false)
, stackAlignment(4)

View File

@ -106,6 +106,10 @@ protected:
/// over 16-bit ones.
bool Pref32BitThumb;
/// HasMPExtension - True if the subtarget supports Multiprocessing
/// extension (ARMv7 only).
bool HasMPExtension;
/// FPOnlySP - If true, the floating point unit only supports single
/// precision.
bool FPOnlySP;
@ -176,6 +180,7 @@ protected:
bool isFPBrccSlow() const { return SlowFPBrcc; }
bool isFPOnlySP() const { return FPOnlySP; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool hasMPExtension() const { return HasMPExtension; }
bool hasFP16() const { return HasFP16; }
bool hasD16() const { return HasD16; }

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld
; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s -check-prefix=THUMB2
; RUN: llc < %s -march=arm -mattr=+v5te | FileCheck %s -check-prefix=ARM
; RUN: llc < %s -march=thumb -mattr=+v7a,+mp | FileCheck %s -check-prefix=THUMB2
; RUN: llc < %s -march=arm -mattr=+v7a,+mp | FileCheck %s -check-prefix=ARM
; rdar://8601536
define void @t1(i8* %ptr) nounwind {