From 2ad0b373186c19235a5de1e32067e34d5ed6a52a Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Thu, 7 Apr 2011 19:54:57 +0000 Subject: [PATCH] Added a check in the preRA scheduler for potential interference on a induction variable. The preRA scheduler is unaware of induction vars, so we look for potential "virtual register cycles" instead. Fixes Bad scheduling prevents coalescing llvm-svn: 129100 --- llvm/include/llvm/CodeGen/ScheduleDAG.h | 13 ++-- .../SelectionDAG/ScheduleDAGRRList.cpp | 59 +++++++++++++++++-- .../SelectionDAG/ScheduleDAGSDNodes.cpp | 46 +++++++++++++++ .../CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 6 ++ llvm/test/CodeGen/ARM/2011-04-07-schediv.ll | 31 ++++++++++ 5 files changed, 145 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/2011-04-07-schediv.ll diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h index a303dbbec0fd..9a2345b368df 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -250,6 +250,7 @@ namespace llvm { unsigned NumSuccsLeft; // # of succs not scheduled. unsigned short NumRegDefsLeft; // # of reg defs with no scheduled use. unsigned short Latency; // Node latency. + bool isVRegCycle : 1; // May use and def the same vreg. bool isCall : 1; // Is a function call. bool isTwoAddress : 1; // Is a two-address instruction. bool isCommutable : 1; // Is a commutable instruction. @@ -278,8 +279,8 @@ namespace llvm { : Node(node), Instr(0), OrigNode(0), NodeNum(nodenum), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), - isCall(false), isTwoAddress(false), isCommutable(false), - hasPhysRegDefs(false), hasPhysRegClobbers(false), + isVRegCycle(false), isCall(false), isTwoAddress(false), + isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), isScheduleHigh(false), isCloned(false), SchedulingPref(Sched::None), @@ -292,8 +293,8 @@ namespace llvm { : Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), - isCall(false), isTwoAddress(false), isCommutable(false), - hasPhysRegDefs(false), hasPhysRegClobbers(false), + isVRegCycle(false), isCall(false), isTwoAddress(false), + isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), isScheduleHigh(false), isCloned(false), SchedulingPref(Sched::None), @@ -305,8 +306,8 @@ namespace llvm { : Node(0), Instr(0), OrigNode(0), NodeNum(~0u), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), - isCall(false), isTwoAddress(false), isCommutable(false), - hasPhysRegDefs(false), hasPhysRegClobbers(false), + isVRegCycle(false), isCall(false), isTwoAddress(false), + isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), isScheduleHigh(false), isCloned(false), SchedulingPref(Sched::None), diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index f5a5db89fa0c..b2e9c15b68bc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -77,6 +77,9 @@ static cl::opt DisableSchedRegPressure( static cl::opt DisableSchedLiveUses( "disable-sched-live-uses", cl::Hidden, cl::init(true), cl::desc("Disable live use priority in sched=list-ilp")); +static cl::opt DisableSchedVRegCycle( + "disable-sched-vrcycle", cl::Hidden, cl::init(false), + cl::desc("Disable virtual register cycle interference checks")); static cl::opt DisableSchedStalls( "disable-sched-stalls", cl::Hidden, cl::init(true), cl::desc("Disable no-stall priority in sched=list-ilp")); @@ -2045,6 +2048,44 @@ static bool UnitsSharePred(const SUnit *left, const SUnit *right) { return false; } +// Return true if the virtual register defined by VRCycleSU may interfere with +// VRUseSU. +// +// Note: We may consider two SU's that use the same value live into a loop as +// interferng even though the value is not an induction variable. This is an +// unfortunate consequence of scheduling on the selection DAG. +static bool checkVRegCycleInterference(const SUnit *VRCycleSU, + const SUnit *VRUseSU) { + for (SUnit::const_pred_iterator I = VRCycleSU->Preds.begin(), + E = VRCycleSU->Preds.end(); I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + SDNode *InNode = I->getSUnit()->getNode(); + if (!InNode || InNode->getOpcode() != ISD::CopyFromReg) + continue; + for (SUnit::const_pred_iterator II = VRUseSU->Preds.begin(), + EE = VRUseSU->Preds.end(); II != EE; ++II) { + if (II->getSUnit() == I->getSUnit()) + return true; + } + } + return false; +} + +// Compare the VRegCycle properties of the nodes. +// Return -1 if left has higher priority, 1 if right has higher priority. +// Return 0 if priority is equivalent. +static int BUCompareVRegCycle(const SUnit *left, const SUnit *right) { + if (left->isVRegCycle && !right->isVRegCycle) { + if (checkVRegCycleInterference(left, right)) + return -1; + } + else if (!left->isVRegCycle && right->isVRegCycle) { + if (checkVRegCycleInterference(right, left)) + return 1; + } + return 0; +} + // Check for either a dependence (latency) or resource (hazard) stall. // // Note: The ScheduleHazardRecognizer interface requires a non-const SU. @@ -2232,11 +2273,15 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { << left->NodeNum << ")\n"); return false; } - else if (!LHigh && !RHigh) { - int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); - if (result != 0) - return result > 0; + int result = 0; + if (!DisableSchedVRegCycle) { + result = BUCompareVRegCycle(left, right); } + if (result == 0 && !LHigh && !RHigh) { + result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); + } + if (result != 0) + return result > 0; return BURRSort(left, right, SPQ); } @@ -2302,6 +2347,12 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { if (RReduce && !LReduce) return true; } + if (!DisableSchedVRegCycle) { + int result = BUCompareVRegCycle(left, right); + if (result != 0) + return result > 0; + } + if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 202f7ab4e14f..24a1937c445e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -81,6 +81,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SUnit *SU = NewSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; SU->Latency = Old->Latency; + SU->isVRegCycle = Old->isVRegCycle; SU->isCall = Old->isCall; SU->isTwoAddress = Old->isTwoAddress; SU->isCommutable = Old->isCommutable; @@ -341,6 +342,10 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); + // Set isVRegCycle if the node operands are live into and value is live out + // of a single block loop. + InitVRegCycleFlag(NodeSUnit); + // Compute NumRegDefsLeft. This must be done before AddSchedEdges. InitNumRegDefsLeft(NodeSUnit); @@ -507,6 +512,47 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() { } } +// Set isVRegCycle if this node's single use is CopyToReg and its only active +// data operands are CopyFromReg. +// +// This is only relevant for single-block loops, in which case the VRegCycle +// node is likely an induction variable in which the operand and target virtual +// registers should be coalesced (e.g. pre/post increment values). Setting the +// isVRegCycle flag helps the scheduler prioritize other uses of the same +// CopyFromReg so that this node becomes the virtual register "kill". This +// avoids interference between the values live in and out of the block and +// eliminates a copy inside the loop. +void ScheduleDAGSDNodes::InitVRegCycleFlag(SUnit *SU) { + if (!BB->isSuccessor(BB)) + return; + + SDNode *N = SU->getNode(); + if (N->getGluedNode()) + return; + + if (!N->hasOneUse() || N->use_begin()->getOpcode() != ISD::CopyToReg) + return; + + bool FoundLiveIn = false; + for (SDNode::op_iterator OI = N->op_begin(), E = N->op_end(); OI != E; ++OI) { + EVT OpVT = OI->getValueType(); + assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!"); + + if (OpVT == MVT::Other) + continue; // ignore chain operands + + if (isPassiveNode(OI->getNode())) + continue; // ignore constants and such + + if (OI->getNode()->getOpcode() != ISD::CopyFromReg) + return; + + FoundLiveIn = true; + } + if (FoundLiveIn) + SU->isVRegCycle = true; +} + void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { assert(SU->NumRegDefsLeft == 0 && "expect a new node"); for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index cc7310e4ca42..b5f68f3055cf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -80,6 +80,12 @@ namespace llvm { /// flagged together nodes with a single SUnit. virtual void BuildSchedGraph(AliasAnalysis *AA); + /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is + /// CopyToReg and its only active data operands are CopyFromReg within a + /// single block loop. + /// + void InitVRegCycleFlag(SUnit *SU); + /// InitNumRegDefsLeft - Determine the # of regs defined by this node. /// void InitNumRegDefsLeft(SUnit *SU); diff --git a/llvm/test/CodeGen/ARM/2011-04-07-schediv.ll b/llvm/test/CodeGen/ARM/2011-04-07-schediv.ll new file mode 100644 index 000000000000..a61908fd7c45 --- /dev/null +++ b/llvm/test/CodeGen/ARM/2011-04-07-schediv.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s +; Tests preRAsched support for VRegCycle interference. + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10" + +define void @t(i32 %src_width, float* nocapture %src_copy_start, float* nocapture %dst_copy_start, i32 %src_copy_start_index) nounwind optsize { +entry: + %src_copy_start6 = bitcast float* %src_copy_start to i8* + %0 = icmp eq i32 %src_width, 0 + br i1 %0, label %return, label %bb + +; Make sure the scheduler schedules all uses of the preincrement +; induction variable before defining the postincrement value. +; CHECK: t: +; CHECK-NOT: mov +bb: ; preds = %entry, %bb + %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ] + %tmp = mul i32 %j.05, %src_copy_start_index + %uglygep = getelementptr i8* %src_copy_start6, i32 %tmp + %src_copy_start_addr.04 = bitcast i8* %uglygep to float* + %dst_copy_start_addr.03 = getelementptr float* %dst_copy_start, i32 %j.05 + %1 = load float* %src_copy_start_addr.04, align 4 + store float %1, float* %dst_copy_start_addr.03, align 4 + %2 = add i32 %j.05, 1 + %exitcond = icmp eq i32 %2, %src_width + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +}