diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index f0331b83a1a9..812d2ecbd2d9 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -142,6 +142,8 @@ namespace { bool MatchAddress(SDOperand N, X86ISelAddressMode &AM, bool isRoot = true, unsigned Depth = 0); + bool MatchAddressBase(SDOperand N, X86ISelAddressMode &AM, + bool isRoot, unsigned Depth); bool SelectAddr(SDOperand Op, SDOperand N, SDOperand &Base, SDOperand &Scale, SDOperand &Index, SDOperand &Disp); bool SelectLEAAddr(SDOperand Op, SDOperand N, SDOperand &Base, @@ -572,12 +574,9 @@ void X86DAGToDAGISel::EmitFunctionEntryCode(Function &Fn, MachineFunction &MF) { /// addressing mode bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, bool isRoot, unsigned Depth) { - if (Depth > 5) { - // Default, generate it as a register. - AM.BaseType = X86ISelAddressMode::RegBase; - AM.Base.Reg = N; - return false; - } + // Limit recursion. + if (Depth > 5) + return MatchAddressBase(N, AM, isRoot, Depth); // RIP relative addressing: %rip + 32-bit displacement! if (AM.isRIPRel) { @@ -763,6 +762,13 @@ bool X86DAGToDAGISel::MatchAddress(SDOperand N, X86ISelAddressMode &AM, break; } + return MatchAddressBase(N, AM, isRoot, Depth); +} + +/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the +/// specified addressing mode without any further recursion. +bool X86DAGToDAGISel::MatchAddressBase(SDOperand N, X86ISelAddressMode &AM, + bool isRoot, unsigned Depth) { // Is the base register already occupied? if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.Val) { // If so, check to see if the scale index register is set. diff --git a/llvm/test/CodeGen/X86/lea-recursion.ll b/llvm/test/CodeGen/X86/lea-recursion.ll new file mode 100644 index 000000000000..ca7b3674d27c --- /dev/null +++ b/llvm/test/CodeGen/X86/lea-recursion.ll @@ -0,0 +1,47 @@ +; RUN: llvm-as < %s | llc -march=x86-64 | grep lea | wc -l | grep 12 + +; This testcase was written to demonstrate an instruction-selection problem, +; however it also happens to expose a limitation in the DAGCombiner's +; expression reassociation which causes it to miss opportunities for +; constant folding due to the intermediate adds having multiple uses. +; The Reassociate pass has similar limitations. If these limitations are +; fixed, the test commands above will need to be updated to expect fewer +; lea instructions. + +@g0 = weak global [1000 x i32] zeroinitializer, align 32 ; <[1000 x i32]*> [#uses=8] +@g1 = weak global [1000 x i32] zeroinitializer, align 32 ; <[1000 x i32]*> [#uses=7] + +define void @foo() { +entry: + %tmp4 = load i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 0) ; [#uses=1] + %tmp8 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 0) ; [#uses=1] + %tmp9 = add i32 %tmp4, 1 ; [#uses=1] + %tmp10 = add i32 %tmp9, %tmp8 ; [#uses=2] + store i32 %tmp10, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 1) + %tmp8.1 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 1) ; [#uses=1] + %tmp9.1 = add i32 %tmp10, 1 ; [#uses=1] + %tmp10.1 = add i32 %tmp9.1, %tmp8.1 ; [#uses=2] + store i32 %tmp10.1, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 2) + %tmp8.2 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 2) ; [#uses=1] + %tmp9.2 = add i32 %tmp10.1, 1 ; [#uses=1] + %tmp10.2 = add i32 %tmp9.2, %tmp8.2 ; [#uses=2] + store i32 %tmp10.2, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 3) + %tmp8.3 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 3) ; [#uses=1] + %tmp9.3 = add i32 %tmp10.2, 1 ; [#uses=1] + %tmp10.3 = add i32 %tmp9.3, %tmp8.3 ; [#uses=2] + store i32 %tmp10.3, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 4) + %tmp8.4 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 4) ; [#uses=1] + %tmp9.4 = add i32 %tmp10.3, 1 ; [#uses=1] + %tmp10.4 = add i32 %tmp9.4, %tmp8.4 ; [#uses=2] + store i32 %tmp10.4, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 5) + %tmp8.5 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 5) ; [#uses=1] + %tmp9.5 = add i32 %tmp10.4, 1 ; [#uses=1] + %tmp10.5 = add i32 %tmp9.5, %tmp8.5 ; [#uses=2] + store i32 %tmp10.5, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 6) + %tmp8.6 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 6) ; [#uses=1] + %tmp9.6 = add i32 %tmp10.5, 1 ; [#uses=1] + %tmp10.6 = add i32 %tmp9.6, %tmp8.6 ; [#uses=1] + store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7) + ret void +} +