In the common case where we are shuffling a vector, emit an

llvm vector shuffle instead of a bunch of insert/extract operations.
For:   vec4 = vec4.yyyy;  // splat

Emit:
        %tmp1 = shufflevector <4 x float> %tmp, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 > 

instead of:

        %tmp1 = extractelement <4 x float> %tmp, i32 1          
        %tmp2 = insertelement <4 x float> undef, float %tmp1, i32 0             
        %tmp3 = extractelement <4 x float> %tmp, i32 1          
        %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1             
        %tmp5 = extractelement <4 x float> %tmp, i32 1          
        %tmp6 = insertelement <4 x float> %tmp4, float %tmp5, i32 2             
        %tmp7 = extractelement <4 x float> %tmp, i32 1          
        %tmp8 = insertelement <4 x float> %tmp6, float %tmp7, i32 3             

llvm-svn: 40779
This commit is contained in:
Chris Lattner 2007-08-03 16:09:33 +00:00
parent 177bd450e0
commit fb837dccac
1 changed files with 19 additions and 2 deletions

View File

@ -298,14 +298,31 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, QualType ExprType) {
return RValue::get(Builder.CreateExtractElement(Vec, Elt, "tmp"));
}
// If the source and destination have the same number of elements, use a
// vector shuffle instead of insert/extracts.
unsigned NumResultElts = cast<VectorType>(ExprType)->getNumElements();
unsigned NumSourceElts =
cast<llvm::VectorType>(Vec->getType())->getNumElements();
unsigned NumElts = cast<VectorType>(ExprType)->getNumElements();
if (NumResultElts == NumSourceElts) {
llvm::SmallVector<llvm::Constant*, 4> Mask;
for (unsigned i = 0; i != NumResultElts; ++i) {
unsigned InIdx = OCUVectorComponent::getAccessedFieldNo(i, EncFields);
Mask.push_back(llvm::ConstantInt::get(llvm::Type::Int32Ty, InIdx));
}
llvm::Value *MaskV = llvm::ConstantVector::get(&Mask[0], Mask.size());
Vec = Builder.CreateShuffleVector(Vec,
llvm::UndefValue::get(Vec->getType()),
MaskV, "tmp");
return RValue::get(Vec);
}
// Start out with an undef of the result type.
llvm::Value *Result = llvm::UndefValue::get(ConvertType(ExprType));
// Extract/Insert each element of the result.
for (unsigned i = 0; i != NumElts; ++i) {
for (unsigned i = 0; i != NumResultElts; ++i) {
unsigned InIdx = OCUVectorComponent::getAccessedFieldNo(i, EncFields);
llvm::Value *Elt = llvm::ConstantInt::get(llvm::Type::Int32Ty, InIdx);
Elt = Builder.CreateExtractElement(Vec, Elt, "tmp");