- Two-address pass should not assume unfolding is always successful.

- X86 unfolding should check if the instructions being unfolded has memoperands.
  If there is no memoperands, then it must assume conservative alignment. If this
  would introduce an expensive sse unaligned load / store, then unfoldMemoryOperand
  etc. should not unfold the instruction.

llvm-svn: 107509
This commit is contained in:
Evan Cheng 2010-07-02 20:36:18 +00:00
parent c653d4c574
commit 0ce84486c3
3 changed files with 126 additions and 12 deletions

View File

@ -926,14 +926,12 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI);
unsigned Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
bool Success =
TII->unfoldMemoryOperand(MF, mi, Reg,
/*UnfoldLoad=*/true, /*UnfoldStore=*/false,
NewMIs);
(void)Success;
assert(Success &&
"unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
"succeeded!");
if (!TII->unfoldMemoryOperand(MF, mi, Reg,
/*UnfoldLoad=*/true,/*UnfoldStore=*/false,
NewMIs)) {
DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
return false;
}
assert(NewMIs.size() == 2 &&
"Unfolded a load into multiple instructions!");
// The load was previously folded, so this is the only use.

View File

@ -2159,7 +2159,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
bool isAligned = (*MMOBegin)->getAlignment() >= 16;
bool isAligned = *MMOBegin && (*MMOBegin)->getAlignment() >= 16;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
@ -2189,7 +2189,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
bool isAligned = (*MMOBegin)->getAlignment() >= 16;
bool isAligned = *MMOBegin && (*MMOBegin)->getAlignment() >= 16;
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
@ -2693,6 +2693,13 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
const TargetInstrDesc &TID = get(Opc);
const TargetOperandInfo &TOI = TID.OpInfo[Index];
const TargetRegisterClass *RC = TOI.getRegClass(&RI);
if (!MI->hasOneMemOperand() &&
RC == &X86::VR128RegClass &&
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
// Without memoperands, loadRegFromAddr and storeRegToStackSlot will
// conservatively assume the address is unaligned. That's bad for
// performance.
return false;
SmallVector<MachineOperand, X86AddrNumOperands> AddrOps;
SmallVector<MachineOperand,2> BeforeOps;
SmallVector<MachineOperand,2> AfterOps;
@ -2834,7 +2841,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
MachineInstr::mmo_iterator> MMOs =
MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
cast<MachineSDNode>(N)->memoperands_end());
bool isAligned = (*MMOs.first)->getAlignment() >= 16;
if (!(*MMOs.first) &&
RC == &X86::VR128RegClass &&
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
// Do not introduce a slow unaligned load.
return false;
bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
VT, MVT::Other, &AddrOps[0], AddrOps.size());
NewNodes.push_back(Load);
@ -2871,7 +2883,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
MachineInstr::mmo_iterator> MMOs =
MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
cast<MachineSDNode>(N)->memoperands_end());
bool isAligned = (*MMOs.first)->getAlignment() >= 16;
if (!(*MMOs.first) &&
RC == &X86::VR128RegClass &&
!TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
// Do not introduce a slow unaligned store.
return false;
bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
isAligned, TM),
dl, MVT::Other,

View File

@ -0,0 +1,99 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin
; rdar://8154265
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
define void @_ZN2CA3OGL20fill_surface_mesh_3dERNS0_7ContextEPKNS_6Render13MeshTransformEPKNS0_5LayerEPNS0_7SurfaceEfNS0_13TextureFilterESC_f() nounwind optsize ssp {
entry:
br i1 undef, label %bb2.thread, label %bb2
bb2.thread: ; preds = %entry
br i1 undef, label %bb41, label %bb10.preheader
bb2: ; preds = %entry
unreachable
bb10.preheader: ; preds = %bb2.thread
br i1 undef, label %bb9, label %bb12
bb9: ; preds = %bb9, %bb10.preheader
br i1 undef, label %bb9, label %bb12
bb12: ; preds = %bb9, %bb10.preheader
br i1 undef, label %bb4.i.i, label %bb3.i.i
bb3.i.i: ; preds = %bb12
unreachable
bb4.i.i: ; preds = %bb12
br i1 undef, label %bb8.i.i, label %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit
bb8.i.i: ; preds = %bb4.i.i
br i1 undef, label %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit, label %bb9.i.i
bb9.i.i: ; preds = %bb8.i.i
br i1 undef, label %bb11.i.i, label %bb10.i.i
bb10.i.i: ; preds = %bb9.i.i
unreachable
bb11.i.i: ; preds = %bb9.i.i
unreachable
_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit: ; preds = %bb8.i.i, %bb4.i.i
br i1 undef, label %bb19, label %bb14
bb14: ; preds = %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit
unreachable
bb19: ; preds = %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit
br i1 undef, label %bb.i50, label %bb6.i
bb.i50: ; preds = %bb19
unreachable
bb6.i: ; preds = %bb19
br i1 undef, label %bb28, label %bb.nph106
bb22: ; preds = %bb24.preheader
br i1 undef, label %bb2.i.i, label %bb.i.i49
bb.i.i49: ; preds = %bb22
%0 = load float* undef, align 4 ; <float> [#uses=1]
%1 = insertelement <4 x float> undef, float %0, i32 0 ; <<4 x float>> [#uses=1]
%2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> %1) nounwind readnone ; <<4 x float>> [#uses=1]
%3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %2, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>) nounwind readnone ; <<4 x float>> [#uses=1]
%4 = extractelement <4 x float> %3, i32 0 ; <float> [#uses=1]
store float %4, float* undef, align 4
%5 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> undef) nounwind readnone ; <<4 x float>> [#uses=1]
%6 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %5, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>) nounwind readnone ; <<4 x float>> [#uses=1]
%7 = extractelement <4 x float> %6, i32 0 ; <float> [#uses=1]
store float %7, float* undef, align 4
unreachable
bb2.i.i: ; preds = %bb22
unreachable
bb26.loopexit: ; preds = %bb24.preheader
br i1 undef, label %bb28, label %bb24.preheader
bb.nph106: ; preds = %bb6.i
br label %bb24.preheader
bb24.preheader: ; preds = %bb.nph106, %bb26.loopexit
br i1 undef, label %bb22, label %bb26.loopexit
bb28: ; preds = %bb26.loopexit, %bb6.i
unreachable
bb41: ; preds = %bb2.thread
br i1 undef, label %return, label %bb46
bb46: ; preds = %bb41
ret void
return: ; preds = %bb41
ret void
}