Revert "[X86] Use push-pop for materializing small constants under 'minsize'"

The red zone consists of 128 bytes beyond the stack pointer so that the
allocation of objects in leaf functions doesn't require decrementing
rsp.  In r255656, we introduced an optimization that would cheaply
materialize certain constants via push/pop.  Push decrements the stack
pointer and stores it's result at what is now the top of the stack.
However, this means that using push/pop would encroach on the red zone.
PR26023 gives an example where this corrupts an object in the red zone.

llvm-svn: 256808
This commit is contained in:
David Majnemer 2016-01-05 02:32:06 +00:00
parent 2c82ee60c3
commit 869be0a4a6
8 changed files with 106 additions and 262 deletions

View File

@ -157,13 +157,9 @@ namespace {
/// performance.
bool OptForSize;
/// If true, selector should try to optimize for minimum code size.
bool OptForMinSize;
public:
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel), OptForSize(false),
OptForMinSize(false) {}
: SelectionDAGISel(tm, OptLevel), OptForSize(false) {}
const char *getPassName() const override {
return "X86 DAG->DAG Instruction Selection";
@ -535,10 +531,8 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
}
void X86DAGToDAGISel::PreprocessISelDAG() {
// OptFor[Min]Size are used in pattern predicates that isel is matching.
// OptForSize is used in pattern predicates that isel is matching.
OptForSize = MF->getFunction()->optForSize();
OptForMinSize = MF->getFunction()->optForMinSize();
assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
E = CurDAG->allnodes_end(); I != E; ) {

View File

@ -250,7 +250,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
// Alias instruction mapping movr0 to xor.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
isPseudo = 1, AddedComplexity = 20 in
isPseudo = 1 in
def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
[(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
@ -263,7 +263,7 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
}
let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
AddedComplexity = 15 in {
AddedComplexity = 1 in {
// Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
// which only require 3 bytes compared to MOV32ri which requires 5.
let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {
@ -278,17 +278,6 @@ let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;
}
let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in {
// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1.
// FIXME: Add itinerary class and Schedule.
def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",
[(set GR32:$dst, i32immSExt8:$src)]>,
Requires<[OptForMinSize]>;
def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
[(set GR64:$dst, i64immSExt8:$src)]>,
Requires<[OptForMinSize, NotWin64WithoutFP]>;
}
// Materialize i64 constant where top 32-bits are zero. This could theoretically
// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
// that would make it more difficult to rematerialize.

View File

@ -23,7 +23,6 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/DerivedTypes.h"
@ -5314,50 +5313,6 @@ static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
return true;
}
bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const {
MachineBasicBlock &MBB = *MIB->getParent();
DebugLoc DL = MIB->getDebugLoc();
int64_t Imm = MIB->getOperand(1).getImm();
assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
MachineBasicBlock::iterator I = MIB.getInstr();
int StackAdjustment;
if (Subtarget.is64Bit()) {
assert(MIB->getOpcode() == X86::MOV64ImmSExti8 ||
MIB->getOpcode() == X86::MOV32ImmSExti8);
// 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
// widen the register if necessary.
StackAdjustment = 8;
BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm);
MIB->setDesc(get(X86::POP64r));
MIB->getOperand(0)
.setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64));
} else {
assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
StackAdjustment = 4;
BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm);
MIB->setDesc(get(X86::POP32r));
}
// Build CFI if necessary.
MachineFunction &MF = *MBB.getParent();
const X86FrameLowering *TFL = Subtarget.getFrameLowering();
bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
bool NeedsDwarfCFI =
!IsWin64Prologue &&
(MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry());
bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
if (EmitCFI) {
TFL->BuildCFI(MBB, I, DL,
MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
TFL->BuildCFI(MBB, std::next(I), DL,
MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
}
return true;
}
// LoadStackGuard has so far only been implemented for 64-bit MachO. Different
// code sequence is needed for other targets.
static void expandLoadStackGuard(MachineInstrBuilder &MIB,
@ -5390,9 +5345,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
case X86::MOV32r_1:
return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
case X86::MOV32ImmSExti8:
case X86::MOV64ImmSExti8:
return ExpandMOVImmSExti8(MIB);
case X86::SETB_C8r:
return Expand2AddrUndef(MIB, get(X86::SBB8rr));
case X86::SETB_C16r:

View File

@ -23,7 +23,6 @@
#include "X86GenInstrInfo.inc"
namespace llvm {
class MachineInstrBuilder;
class X86RegisterInfo;
class X86Subtarget;
@ -565,9 +564,6 @@ private:
/// operand and follow operands form a reference to the stack frame.
bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
int &FrameIndex) const;
/// Expand the MOVImmSExti8 pseudo-instructions.
bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const;
};
} // End llvm namespace

View File

@ -822,8 +822,6 @@ def In32BitMode : Predicate<"Subtarget->is32Bit()">,
AssemblerPredicate<"Mode32Bit", "32-bit mode">;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||"
"Subtarget->getFrameLowering()->hasFP(*MF)">;
def IsPS4 : Predicate<"Subtarget->isTargetPS4()">;
def NotPS4 : Predicate<"!Subtarget->isTargetPS4()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
@ -837,7 +835,6 @@ def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
def OptForSize : Predicate<"OptForSize">;
def OptForMinSize : Predicate<"OptForMinSize">;
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;

View File

@ -0,0 +1,100 @@
; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK32
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK64
define i32 @one32() optsize {
entry:
ret i32 1
; CHECK32-LABEL: one32
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: incl %eax
; CHECK32-NEXT: ret
; FIXME: Figure out the best approach in 64-bit mode.
; CHECK64-LABEL: one32
; CHECK64: movl $1, %eax
; CHECK64-NEXT: retq
}
define i32 @minus_one32() optsize {
entry:
ret i32 -1
; CHECK32-LABEL: minus_one32
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: decl %eax
; CHECK32-NEXT: ret
}
define i16 @one16() optsize {
entry:
ret i16 1
; CHECK32-LABEL: one16
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: incl %eax
; CHECK32-NEXT: retl
}
define i16 @minus_one16() optsize {
entry:
ret i16 -1
; CHECK32-LABEL: minus_one16
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: decl %eax
; CHECK32-NEXT: retl
}
define i32 @test_rematerialization() optsize {
entry:
; Materialize -1 (thiscall forces it into %ecx).
tail call x86_thiscallcc void @f(i32 -1)
; Clobber all registers except %esp, leaving nowhere to store the -1 besides
; spilling it to the stack.
tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
; -1 should be re-materialized here instead of getting spilled above.
ret i32 -1
; CHECK32-LABEL: test_rematerialization
; CHECK32: xorl %ecx, %ecx
; CHECK32-NEXT: decl %ecx
; CHECK32: calll
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: decl %eax
; CHECK32-NOT: %eax
; CHECK32: retl
}
define i32 @test_rematerialization2(i32 %x) optsize {
entry:
; Materialize -1 (thiscall forces it into %ecx).
tail call x86_thiscallcc void @f(i32 -1)
; Clobber all registers except %esp, leaving nowhere to store the -1 besides
; spilling it to the stack.
tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
; Define eflags.
%a = icmp ne i32 %x, 123
%b = zext i1 %a to i32
; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
; It must therefore not use the xor-dec lowering.
%c = select i1 %a, i32 %b, i32 -1
ret i32 %c
; CHECK32-LABEL: test_rematerialization2
; CHECK32: xorl %ecx, %ecx
; CHECK32-NEXT: decl %ecx
; CHECK32: calll
; CHECK32: cmpl
; CHECK32: setne
; CHECK32-NOT: xorl
; CHECK32: movl $-1
; CHECK32: cmov
; CHECK32: retl
}
declare x86_thiscallcc void @f(i32)

View File

@ -1,184 +0,0 @@
; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK32
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK64
; RUN: llc -mtriple=x86_64-pc-win32 -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECKWIN64
define i32 @one32_nooptsize() {
entry:
ret i32 1
; When not optimizing for size, use mov.
; CHECK32-LABEL: one32_nooptsize:
; CHECK32: movl $1, %eax
; CHECK32-NEXT: retl
; CHECK64-LABEL: one32_nooptsize:
; CHECK64: movl $1, %eax
; CHECK64-NEXT: retq
}
define i32 @one32() optsize {
entry:
ret i32 1
; CHECK32-LABEL: one32:
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: incl %eax
; CHECK32-NEXT: retl
; FIXME: Figure out the best approach in 64-bit mode.
; CHECK64-LABEL: one32:
; CHECK64: movl $1, %eax
; CHECK64-NEXT: retq
}
define i32 @one32_minsize() minsize {
entry:
ret i32 1
; On 32-bit, xor-inc is preferred over push-pop.
; CHECK32-LABEL: one32_minsize:
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: incl %eax
; CHECK32-NEXT: retl
; On 64-bit we don't do xor-inc yet, so push-pop it is. Note that we have to
; pop into a 64-bit register even when we just need 32 bits.
; CHECK64-LABEL: one32_minsize:
; CHECK64: pushq $1
; CHECK64: .cfi_adjust_cfa_offset 8
; CHECK64: popq %rax
; CHECK64: .cfi_adjust_cfa_offset -8
; CHECK64-NEXT: retq
}
define i64 @one64_minsize() minsize {
entry:
ret i64 1
; On 64-bit we don't do xor-inc yet, so push-pop it is.
; CHECK64-LABEL: one64_minsize:
; CHECK64: pushq $1
; CHECK64: .cfi_adjust_cfa_offset 8
; CHECK64: popq %rax
; CHECK64: .cfi_adjust_cfa_offset -8
; CHECK64-NEXT: retq
; On Win64 we can't adjust the stack unless there's a frame pointer.
; CHECKWIN64-LABEL: one64_minsize:
; CHECKWIN64: movl $1, %eax
; CHECKWIN64-NEXT: retq
}
define i32 @minus_one32() optsize {
entry:
ret i32 -1
; CHECK32-LABEL: minus_one32:
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: decl %eax
; CHECK32-NEXT: retl
}
define i32 @minus_one32_minsize() minsize {
entry:
ret i32 -1
; xor-dec is preferred over push-pop.
; CHECK32-LABEL: minus_one32_minsize:
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: decl %eax
; CHECK32-NEXT: retl
}
define i16 @one16() optsize {
entry:
ret i16 1
; CHECK32-LABEL: one16:
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: incl %eax
; CHECK32-NEXT: retl
}
define i16 @minus_one16() optsize {
entry:
ret i16 -1
; CHECK32-LABEL: minus_one16:
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: decl %eax
; CHECK32-NEXT: retl
}
define i32 @minus_five32() minsize {
entry:
ret i32 -5
; CHECK32-LABEL: minus_five32:
; CHECK32: pushl $-5
; CHECK32: popl %eax
; CHECK32: retl
}
define i64 @minus_five64() minsize {
entry:
ret i64 -5
; CHECK64-LABEL: minus_five64:
; CHECK64: pushq $-5
; CHECK64: .cfi_adjust_cfa_offset 8
; CHECK64: popq %rax
; CHECK64: .cfi_adjust_cfa_offset -8
; CHECK64: retq
}
define i32 @rematerialize_minus_one() optsize {
entry:
; Materialize -1 (thiscall forces it into %ecx).
tail call x86_thiscallcc void @f(i32 -1)
; Clobber all registers except %esp, leaving nowhere to store the -1 besides
; spilling it to the stack.
tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
; -1 should be re-materialized here instead of getting spilled above.
ret i32 -1
; CHECK32-LABEL: rematerialize_minus_one
; CHECK32: xorl %ecx, %ecx
; CHECK32-NEXT: decl %ecx
; CHECK32: calll
; CHECK32: xorl %eax, %eax
; CHECK32-NEXT: decl %eax
; CHECK32-NOT: %eax
; CHECK32: retl
}
define i32 @rematerialize_minus_one_eflags(i32 %x) optsize {
entry:
; Materialize -1 (thiscall forces it into %ecx).
tail call x86_thiscallcc void @f(i32 -1)
; Clobber all registers except %esp, leaving nowhere to store the -1 besides
; spilling it to the stack.
tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
; Define eflags.
%a = icmp ne i32 %x, 123
%b = zext i1 %a to i32
; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
; It must therefore not use the xor-dec lowering.
%c = select i1 %a, i32 %b, i32 -1
ret i32 %c
; CHECK32-LABEL: rematerialize_minus_one_eflags
; CHECK32: xorl %ecx, %ecx
; CHECK32-NEXT: decl %ecx
; CHECK32: calll
; CHECK32: cmpl
; CHECK32: setne
; CHECK32-NOT: xorl
; CHECK32: movl $-1
; CHECK32: cmov
; CHECK32: retl
}
declare x86_thiscallcc void @f(i32)

View File

@ -29,9 +29,9 @@ define double @pow_wrapper_optsize(double %a) optsize {
define double @pow_wrapper_minsize(double %a) minsize {
; CHECK-LABEL: pow_wrapper_minsize:
; CHECK: # BB#0:
; CHECK-NEXT: movl $128, %edi
; CHECK-NEXT: movl $15, %edi
; CHECK-NEXT: jmp
%ret = tail call double @llvm.powi.f64(double %a, i32 128) nounwind ; <double> [#uses=1]
%ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
ret double %ret
}