From bf6568b5d6698d160d2108376debef084ad0ccab Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sat, 29 Oct 2011 21:23:04 +0000 Subject: [PATCH] Add a new DAGCombine optimization for BUILD_VECTOR. If all of the inputs are zero/any_extended, create a new simple BV which can be further optimized by other BV optimizations. llvm-svn: 143297 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 83 +++++++++++++++++++ .../X86/2009-06-07-ExpandMMXBitcast.ll | 2 +- llvm/test/CodeGen/X86/2011-10-27-tstore.ll | 16 ++++ llvm/test/CodeGen/X86/vec_shuffle-37.ll | 10 ++- 4 files changed, 106 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/X86/2011-10-27-tstore.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b1afbf20c1be..d96ce75a83ed 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6936,7 +6936,90 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); + // Check to see if this is a BUILD_VECTOR of a bunch of values + // which come from any_extend or zero_extend nodes. If so, we can create + // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR + // optimizations. + EVT SourceType = MVT::Other; + bool allExtend = true; + bool allAnyExt = true; + for (unsigned i = 0; i < NumInScalars; ++i) { + SDValue In = N->getOperand(i); + // Ignore undef inputs. + if (In.getOpcode() == ISD::UNDEF) continue; + + bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; + bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; + + // Abort non-extend incoming values. + if (!ZeroExt && !AnyExt) { + allExtend = false; + break; + } + + // The input is a ZeroExt or AnyExt. Check the original type. + EVT InTy = In.getOperand(0).getValueType(); + + // Check that all of the widened source types are the same. + if (SourceType == MVT::Other) + SourceType = InTy; + else if (InTy != SourceType) { + // Multiple income types. Abort. + allExtend = false; + break; + } + + // Check if all of the extends are ANY_EXTENDs. + allAnyExt &= AnyExt; + } + + // And we are post type-legalization, + // If all of the values are Ext or undef, + // We have a non undef entry. + if (LegalTypes && allExtend && SourceType != MVT::Other) { + bool isLE = TLI.isLittleEndian(); + EVT InScalarTy = SourceType.getScalarType(); + EVT OutScalarTy = N->getValueType(0).getScalarType(); + unsigned ElemRatio = OutScalarTy.getSizeInBits()/InScalarTy.getSizeInBits(); + assert(ElemRatio > 1 && "Invalid element size ratio"); + SDValue Filler = allAnyExt ? DAG.getUNDEF(InScalarTy): + DAG.getConstant(0, InScalarTy); + + unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); + SmallVector Ops(NewBVElems , Filler); + + // Populate the new build_vector + for (unsigned i=0; i < N->getNumOperands(); ++i) { + SDValue Cast = N->getOperand(i); + assert(Cast.getOpcode() == ISD::ANY_EXTEND || + Cast.getOpcode() == ISD::ZERO_EXTEND || + Cast.getOpcode() == ISD::UNDEF && "Invalid cast opcode"); + SDValue In; + if (Cast.getOpcode() == ISD::UNDEF) + In = DAG.getUNDEF(InScalarTy); + else + In = Cast->getOperand(0); + unsigned Index = isLE ? (i * ElemRatio) : + (i * ElemRatio + (ElemRatio - 1)); + + assert(Index < Ops.size() && "Invalid index"); + Ops[Index] = In; + } + + // The type of the new BUILD_VECTOR node. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), InScalarTy, NewBVElems); + assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() && + "Invalid vector size"); + + // Make the new BUILD_VECTOR. + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + VecVT, &Ops[0], Ops.size()); + + // Bitcast to the desired type. + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV); + } // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from diff --git a/llvm/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll b/llvm/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll index 025ab2e7c111..63a7da87550f 100644 --- a/llvm/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll +++ b/llvm/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-linux -mattr=+mmx | grep movd | count 3 +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+mmx | grep movd | count 2 define i64 @a(i32 %a, i32 %b) nounwind readnone { entry: diff --git a/llvm/test/CodeGen/X86/2011-10-27-tstore.ll b/llvm/test/CodeGen/X86/2011-10-27-tstore.ll new file mode 100644 index 000000000000..016e02c3d5d7 --- /dev/null +++ b/llvm/test/CodeGen/X86/2011-10-27-tstore.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +;CHECK: ltstore +;CHECK: pshufd +;CHECK: pshufd +;CHECK: ret +define void @ltstore() { +entry: + %in = load <4 x i32>* undef + %j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> + store <2 x i32> %j, <2 x i32>* undef + ret void +} + diff --git a/llvm/test/CodeGen/X86/vec_shuffle-37.ll b/llvm/test/CodeGen/X86/vec_shuffle-37.ll index e91a7347cca0..060839893820 100644 --- a/llvm/test/CodeGen/X86/vec_shuffle-37.ll +++ b/llvm/test/CodeGen/X86/vec_shuffle-37.ll @@ -26,10 +26,12 @@ entry: define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline { entry: -; CHECK: movl 36({{%rdi|%rcx}}) -; CHECK-NEXT: movl 48({{%rdi|%rcx}}) -; CHECK: punpcklqdq -; CHECK: movq %xmm0, ({{%rsi|%rdx}}) +; CHECK: t02 +; CHECK: movaps +; CHECK: shufps +; CHECK: pshufd +; CHECK: movq +; CHECK: ret %0 = bitcast <8 x i32>* %source to <4 x i32>* %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3 %tmp2 = load <4 x i32>* %arrayidx, align 16