From fad2761ca0924216c9296cbdbaefc81b58067128 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Fri, 7 Mar 2014 10:24:44 +0000 Subject: [PATCH] InstCombine: form shuffles from wider range of insert/extractelements Sequences of insertelement/extractelements are sometimes used to build vectorsr; this code tries to put them back together into shuffles, but could only produce a completely uniform shuffle types ( from two sources). This should allow shuffles with different numbers of elements on the input and output sides as well. llvm-svn: 203229 --- llvm/lib/Target/AArch64/AArch64InstrNEON.td | 29 +++++ .../InstCombine/InstCombineVectorOps.cpp | 119 ++++++++++-------- .../InstCombine/insert-extract-shuffle.ll | 37 ++++++ 3 files changed, 136 insertions(+), 49 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrNEON.td b/llvm/lib/Target/AArch64/AArch64InstrNEON.td index 233f4046975a..3b919b388b2c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrNEON.td +++ b/llvm/lib/Target/AArch64/AArch64InstrNEON.td @@ -3321,6 +3321,11 @@ multiclass NeonI_3VDL2_2Op_mull_v3 opcode, string asmop, (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))], NoItinerary>; } + + def : Pat<(v16i8 (int_aarch64_neon_vmull_p64 + (v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 1))), + (v1i64 (extract_subvector (v2i64 VPR128:$Rm), (i64 1))))), + (!cast(NAME # "_1q2d") VPR128:$Rn, VPR128:$Rm)>; } defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi", @@ -5878,12 +5883,21 @@ multiclass Neon_ScalarXIndexedElem_MUL_Patterns< (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))), (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; + def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn), + (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)))), + (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; + //swapped operands def : Pat<(ResTy (opnode (OpVTy (scalar_to_vector (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))), (OpVTy FPRC:$Rn))), (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; + + def : Pat<(ResTy (opnode + (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)), + (OpVTy FPRC:$Rn))), + (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; } @@ -5975,6 +5989,13 @@ multiclass Neon_ScalarXIndexedElem_MLAL_Patterns< (ResTy (INST (ResTy ResFPRC:$Ra), (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; + def : Pat<(ResTy (opnode + (ResTy ResFPRC:$Ra), + (ResTy (coreopnode (OpTy FPRC:$Rn), + (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)))))), + (ResTy (INST (ResTy ResFPRC:$Ra), + (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; + // swapped operands def : Pat<(ResTy (opnode (ResTy ResFPRC:$Ra), @@ -5984,6 +6005,14 @@ multiclass Neon_ScalarXIndexedElem_MLAL_Patterns< (OpTy FPRC:$Rn))))), (ResTy (INST (ResTy ResFPRC:$Ra), (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; + + def : Pat<(ResTy (opnode + (ResTy ResFPRC:$Ra), + (ResTy (coreopnode + (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)), + (OpTy FPRC:$Rn))))), + (ResTy (INST (ResTy ResFPRC:$Ra), + (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; } // Patterns for Scalar Signed saturating diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 14ba487c9295..9089372dfef4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -326,7 +326,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { /// Otherwise, return false. static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl &Mask) { - assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() && + assert(LHS->getType() == RHS->getType() && "Invalid CollectSingleShuffleElements"); unsigned NumElts = V->getType()->getVectorNumElements(); @@ -367,10 +367,10 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, return true; } } else if (ExtractElementInst *EI = dyn_cast(ScalarOp)){ - if (isa(EI->getOperand(1)) && - EI->getOperand(0)->getType() == V->getType()) { + if (isa(EI->getOperand(1))) { unsigned ExtractedIdx = cast(EI->getOperand(1))->getZExtValue(); + unsigned NumLHSElts = LHS->getType()->getVectorNumElements(); // This must be extracting from either LHS or RHS. if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { @@ -386,7 +386,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, assert(EI->getOperand(0) == RHS); Mask[InsertedIdx % NumElts] = ConstantInt::get(Type::getInt32Ty(V->getContext()), - ExtractedIdx+NumElts); + ExtractedIdx + NumLHSElts); } return true; } @@ -394,29 +394,36 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, } } } - // TODO: Handle shufflevector here! return false; } -/// CollectShuffleElements - We are building a shuffle of V, using RHS as the -/// RHS of the shuffle instruction, if it is not null. Return a shuffle mask -/// that computes V and the LHS value of the shuffle. -static Value *CollectShuffleElements(Value *V, SmallVectorImpl &Mask, - Value *&RHS) { - assert(V->getType()->isVectorTy() && - (RHS == 0 || V->getType() == RHS->getType()) && - "Invalid shuffle!"); + +/// We are building a shuffle to create V, which is a sequence of insertelement, +/// extractelement pairs. If PermittedRHS is set, then we must either use it or +/// not rely on the second vector source. Return an std::pair containing the +/// left and right vectors of the proposed shuffle (or 0), and set the Mask +/// parameter as required. +/// +/// Note: we intentionally don't try to fold earlier shuffles since they have +/// often been chosen carefully to be efficiently implementable on the target. +typedef std::pair ShuffleOps; + +static ShuffleOps CollectShuffleElements(Value *V, + SmallVectorImpl &Mask, + Value *PermittedRHS) { + assert(V->getType()->isVectorTy() && "Invalid shuffle!"); unsigned NumElts = cast(V->getType())->getNumElements(); if (isa(V)) { Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); - return V; + return std::make_pair( + PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr); } if (isa(V)) { Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); - return V; + return std::make_pair(V, nullptr); } if (InsertElementInst *IEI = dyn_cast(V)) { @@ -426,51 +433,59 @@ static Value *CollectShuffleElements(Value *V, SmallVectorImpl &Mask, Value *IdxOp = IEI->getOperand(2); if (ExtractElementInst *EI = dyn_cast(ScalarOp)) { - if (isa(EI->getOperand(1)) && isa(IdxOp) && - EI->getOperand(0)->getType() == V->getType()) { + if (isa(EI->getOperand(1)) && isa(IdxOp)) { unsigned ExtractedIdx = cast(EI->getOperand(1))->getZExtValue(); unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); // Either the extracted from or inserted into vector must be RHSVec, // otherwise we'd end up with a shuffle of three inputs. - if (EI->getOperand(0) == RHS || RHS == 0) { - RHS = EI->getOperand(0); - Value *V = CollectShuffleElements(VecOp, Mask, RHS); + if (EI->getOperand(0) == PermittedRHS || PermittedRHS == 0) { + Value *RHS = EI->getOperand(0); + ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS); + assert(LR.second == 0 || LR.second == RHS); + + if (LR.first->getType() != RHS->getType()) { + // We tried our best, but we can't find anything compatible with RHS + // further up the chain. Return a trivial shuffle. + for (unsigned i = 0; i < NumElts; ++i) + Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i); + return std::make_pair(V, nullptr); + } + + unsigned NumLHSElts = RHS->getType()->getVectorNumElements(); Mask[InsertedIdx % NumElts] = ConstantInt::get(Type::getInt32Ty(V->getContext()), - NumElts+ExtractedIdx); - return V; + NumLHSElts+ExtractedIdx); + return std::make_pair(LR.first, RHS); } - if (VecOp == RHS) { - Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS); - // Update Mask to reflect that `ScalarOp' has been inserted at - // position `InsertedIdx' within the vector returned by IEI. - Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx]; - - // Everything but the extracted element is replaced with the RHS. - for (unsigned i = 0; i != NumElts; ++i) { - if (i != InsertedIdx) - Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), - NumElts+i); - } - return V; + if (VecOp == PermittedRHS) { + // We've gone as far as we can: anything on the other side of the + // extractelement will already have been converted into a shuffle. + unsigned NumLHSElts = + EI->getOperand(0)->getType()->getVectorNumElements(); + for (unsigned i = 0; i != NumElts; ++i) + Mask.push_back(ConstantInt::get( + Type::getInt32Ty(V->getContext()), + i == InsertedIdx ? ExtractedIdx : NumLHSElts + i)); + return std::make_pair(EI->getOperand(0), PermittedRHS); } // If this insertelement is a chain that comes from exactly these two // vectors, return the vector and the effective shuffle. - if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask)) - return EI->getOperand(0); + if (EI->getOperand(0)->getType() == PermittedRHS->getType() && + CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS, + Mask)) + return std::make_pair(EI->getOperand(0), PermittedRHS); } } } - // TODO: Handle shufflevector here! // Otherwise, can't do anything fancy. Return an identity vector. for (unsigned i = 0; i != NumElts; ++i) Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); - return V; + return std::make_pair(V, nullptr); } Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { @@ -485,17 +500,18 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { // If the inserted element was extracted from some other vector, and if the // indexes are constant, try to turn this into a shufflevector operation. if (ExtractElementInst *EI = dyn_cast(ScalarOp)) { - if (isa(EI->getOperand(1)) && isa(IdxOp) && - EI->getOperand(0)->getType() == IE.getType()) { - unsigned NumVectorElts = IE.getType()->getNumElements(); + if (isa(EI->getOperand(1)) && isa(IdxOp)) { + unsigned NumInsertVectorElts = IE.getType()->getNumElements(); + unsigned NumExtractVectorElts = + EI->getOperand(0)->getType()->getVectorNumElements(); unsigned ExtractedIdx = cast(EI->getOperand(1))->getZExtValue(); unsigned InsertedIdx = cast(IdxOp)->getZExtValue(); - if (ExtractedIdx >= NumVectorElts) // Out of range extract. + if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract. return ReplaceInstUsesWith(IE, VecOp); - if (InsertedIdx >= NumVectorElts) // Out of range insert. + if (InsertedIdx >= NumInsertVectorElts) // Out of range insert. return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); // If we are extracting a value from a vector, then inserting it right @@ -507,11 +523,16 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { // (and any insertelements it points to), into one big shuffle. if (!IE.hasOneUse() || !isa(IE.use_back())) { SmallVector Mask; - Value *RHS = 0; - Value *LHS = CollectShuffleElements(&IE, Mask, RHS); - if (RHS == 0) RHS = UndefValue::get(LHS->getType()); - // We now have a shuffle of LHS, RHS, Mask. - return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask)); + ShuffleOps LR = CollectShuffleElements(&IE, Mask, 0); + + // The proposed shuffle may be trivial, in which case we shouldn't + // perform the combine. + if (LR.first != &IE && LR.second != &IE) { + // We now have a shuffle of LHS, RHS, Mask. + if (LR.second == 0) LR.second = UndefValue::get(LR.first->getType()); + return new ShuffleVectorInst(LR.first, LR.second, + ConstantVector::get(Mask)); + } } } } diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll new file mode 100644 index 000000000000..8929c82def7b --- /dev/null +++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -0,0 +1,37 @@ +; RUN: opt -S -instcombine %s | FileCheck %s + +define <1 x i8> @test1(<8 x i8> %in) { +; CHECK-LABEL: @test1 +; CHECK: shufflevector <8 x i8> %in, <8 x i8> undef, <1 x i32> + %val = extractelement <8 x i8> %in, i32 5 + %vec = insertelement <1 x i8> undef, i8 %val, i32 0 + ret <1 x i8> %vec +} + +define <4 x i16> @test2(<8 x i16> %in, <8 x i16> %in2) { +; CHECK-LABEL: @test2 +; CHECK: shufflevector <8 x i16> %in2, <8 x i16> %in, <4 x i32> + %elt0 = extractelement <8 x i16> %in, i32 3 + %elt1 = extractelement <8 x i16> %in, i32 1 + %elt2 = extractelement <8 x i16> %in2, i32 0 + %elt3 = extractelement <8 x i16> %in, i32 2 + + %vec.0 = insertelement <4 x i16> undef, i16 %elt0, i32 0 + %vec.1 = insertelement <4 x i16> %vec.0, i16 %elt1, i32 1 + %vec.2 = insertelement <4 x i16> %vec.1, i16 %elt2, i32 2 + %vec.3 = insertelement <4 x i16> %vec.2, i16 %elt3, i32 3 + + ret <4 x i16> %vec.3 +} + +define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #0 { +; CHECK-LABEL: @test_vcopyq_lane_p64 +; CHECK: extractelement +; CHECK: insertelement +; CHECK-NOT: shufflevector +entry: + %elt = extractelement <1 x i64> %b, i32 0 + %res = insertelement <2 x i64> %a, i64 %elt, i32 1 + ret <2 x i64> %res +} +