[Hexagon] Avoid introducing calls into coalesced range of HVX vector pairs
If an HVX vector register is to be coalesced into a vector pair, make sure that the vector pair will not have a function call in its live range, unless it already had one. All HVX vector registers are volatile, so any vector register live across a function call will have to be spilled. If a vector needs to be spilled, and it's coalesced into a vector pair then the whole pair will need to be spilled (even if only a part of it is live), taking extra stack space. llvm-svn: 337073
This commit is contained in:
parent
9e25d5d2ce
commit
7ced04c0fd
|
@ -19,6 +19,7 @@
|
|||
#include "HexagonTargetMachine.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/CodeGen/LiveIntervals.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
|
@ -243,6 +244,55 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
|||
}
|
||||
|
||||
|
||||
bool HexagonRegisterInfo::shouldCoalesce(MachineInstr *MI,
|
||||
const TargetRegisterClass *SrcRC, unsigned SubReg,
|
||||
const TargetRegisterClass *DstRC, unsigned DstSubReg,
|
||||
const TargetRegisterClass *NewRC, LiveIntervals &LIS) const {
|
||||
// Coalescing will extend the live interval of the destination register.
|
||||
// If the destination register is a vector pair, avoid introducing function
|
||||
// calls into the interval, since it could result in a spilling of a pair
|
||||
// instead of a single vector.
|
||||
MachineFunction &MF = *MI->getParent()->getParent();
|
||||
const HexagonSubtarget &HST = MF.getSubtarget<HexagonSubtarget>();
|
||||
if (!HST.useHVXOps() || NewRC->getID() != Hexagon::HvxWRRegClass.getID())
|
||||
return true;
|
||||
bool SmallSrc = SrcRC->getID() == Hexagon::HvxVRRegClass.getID();
|
||||
bool SmallDst = DstRC->getID() == Hexagon::HvxVRRegClass.getID();
|
||||
if (!SmallSrc && !SmallDst)
|
||||
return true;
|
||||
|
||||
unsigned DstReg = MI->getOperand(0).getReg();
|
||||
unsigned SrcReg = MI->getOperand(1).getReg();
|
||||
const SlotIndexes &Indexes = *LIS.getSlotIndexes();
|
||||
auto HasCall = [&Indexes] (const LiveInterval::Segment &S) {
|
||||
for (SlotIndex I = S.start.getBaseIndex(), E = S.end.getBaseIndex();
|
||||
I != E; I = I.getNextIndex()) {
|
||||
if (const MachineInstr *MI = Indexes.getInstructionFromIndex(I))
|
||||
if (MI->isCall())
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
if (SmallSrc == SmallDst) {
|
||||
// Both must be true, because the case for both being false was
|
||||
// checked earlier. Both registers will be coalesced into a register
|
||||
// of a wider class (HvxWR), and we don't want its live range to
|
||||
// span over calls.
|
||||
return !any_of(LIS.getInterval(DstReg), HasCall) &&
|
||||
!any_of(LIS.getInterval(SrcReg), HasCall);
|
||||
}
|
||||
|
||||
// If one register is large (HvxWR) and the other is small (HvxVR), then
|
||||
// coalescing is ok if the large is already live across a function call,
|
||||
// or if the small one is not.
|
||||
unsigned SmallReg = SmallSrc ? SrcReg : DstReg;
|
||||
unsigned LargeReg = SmallSrc ? DstReg : SrcReg;
|
||||
return any_of(LIS.getInterval(LargeReg), HasCall) ||
|
||||
!any_of(LIS.getInterval(SmallReg), HasCall);
|
||||
}
|
||||
|
||||
|
||||
unsigned HexagonRegisterInfo::getRARegister() const {
|
||||
return Hexagon::R31;
|
||||
}
|
||||
|
|
|
@ -63,6 +63,10 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC,
|
||||
unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg,
|
||||
const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override;
|
||||
|
||||
// Debug information queries.
|
||||
unsigned getRARegister() const;
|
||||
unsigned getFrameRegister(const MachineFunction &MF) const override;
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
|
||||
target triple = "hexagon"
|
||||
|
||||
; Check that this code only spills a single vector.
|
||||
; CHECK-NOT: vmem(#r29+{{[^0]}})
|
||||
|
||||
%struct.descr = type opaque
|
||||
|
||||
define inreg <64 x i32> @danny(%struct.descr* %desc, i32 %xy0, i32 %xy1) #0 {
|
||||
entry:
|
||||
%call = tail call inreg <32 x i32> @sammy(%struct.descr* %desc, i32 %xy0) #3
|
||||
%call1 = tail call inreg <32 x i32> @kirby(%struct.descr* %desc, i32 %xy1) #3
|
||||
%0 = tail call <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32> %call1, <32 x i32> %call)
|
||||
ret <64 x i32> %0
|
||||
}
|
||||
|
||||
declare inreg <32 x i32> @sammy(%struct.descr*, i32) #1
|
||||
declare inreg <32 x i32> @kirby(%struct.descr*, i32) #1
|
||||
declare <64 x i32> @llvm.hexagon.V6.vcombine.128B(<32 x i32>, <32 x i32>) #2
|
||||
|
||||
attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx-length128b,+hvxv60" }
|
||||
attributes #1 = { "target-cpu"="hexagonv60" "target-features"="+hvx-length128b,+hvxv60" }
|
||||
attributes #2 = { nounwind readnone }
|
||||
attributes #3 = { nounwind }
|
Loading…
Reference in New Issue