CodeGen: Generate scalar code if vector instructions cannot be generated

This fixes two crashes that appeared in case of:
  - A load of a non vectorizable type (e.g. float**)
  - An instruction that is not vectorizable (e.g. call)

llvm-svn: 154586
This commit is contained in:
Tobias Grosser 2012-04-12 10:46:55 +00:00
parent 2fca3c2c87
commit 4cb5461dae
4 changed files with 194 additions and 7 deletions

View File

@ -569,6 +569,12 @@ private:
void copyStore(const StoreInst *Store, ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps);
void copyInstScalarized(const Instruction *Inst, ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps);
bool extractScalarValues(const Instruction *Inst, ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps);
bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap);
void copyInstruction(const Instruction *Inst, ValueMapT &VectorMap,
@ -680,18 +686,16 @@ Value *VectorBlockGenerator::generateUnknownStrideLoad(const LoadInst *Load,
void VectorBlockGenerator::generateLoad(const LoadInst *Load,
ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps) {
Value *NewLoad;
if (GroupedUnrolling) {
if (GroupedUnrolling || !VectorType::isValidElementType(Load->getType())) {
for (int i = 0; i < getVectorWidth(); i++)
ScalarMaps[i][Load] = generateScalarLoad(Load, ScalarMaps[i],
MemoryAccess &Access = Statement.getAccessFor(Load);
Value *NewLoad;
if (Access.isStrideZero(isl_set_copy(Domain)))
NewLoad = generateStrideZeroLoad(Load, ScalarMaps[0]);
else if (Access.isStrideOne(isl_set_copy(Domain)))
@ -772,6 +776,63 @@ bool VectorBlockGenerator::hasVectorOperands(const Instruction *Inst,
return false;
bool VectorBlockGenerator::extractScalarValues(const Instruction *Inst,
ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps) {
bool HasVectorOperand = false;
int VectorWidth = getVectorWidth();
for (Instruction::const_op_iterator OI = Inst->op_begin(),
OE = Inst->op_end(); OI != OE; ++OI) {
ValueMapT::iterator VecOp = VectorMap.find(*OI);
if (VecOp == VectorMap.end())
HasVectorOperand = true;
Value *NewVector = VecOp->second;
for (int i = 0; i < VectorWidth; ++i) {
ValueMapT &SM = ScalarMaps[i];
// If there is one scalar extracted, all scalar elements should have
// already been extracted by the code here. So no need to check for the
// existance of all of them.
if (SM.count(*OI))
SM[*OI] = Builder.CreateExtractElement(NewVector, Builder.getInt32(i));
return HasVectorOperand;
void VectorBlockGenerator::copyInstScalarized(const Instruction *Inst,
ValueMapT &VectorMap,
VectorValueMapT &ScalarMaps) {
bool HasVectorOperand;
int VectorWidth = getVectorWidth();
HasVectorOperand = extractScalarValues(Inst, VectorMap, ScalarMaps);
for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
if (!VectorType::isValidElementType(Inst->getType()) || !HasVectorOperand)
// Make the result available as vector value.
VectorType *VectorType = VectorType::get(Inst->getType(), VectorWidth);
Value *Vector = UndefValue::get(VectorType);
for (int i = 0; i < VectorWidth; i++)
Vector = Builder.CreateInsertElement(Vector, ScalarMaps[i][Inst],
VectorMap[Inst] = Vector;
int VectorBlockGenerator::getVectorWidth() {
return GlobalMaps.size();
@ -805,11 +866,11 @@ void VectorBlockGenerator::copyInstruction(const Instruction *Inst,
llvm_unreachable("Cannot issue vector code for this instruction");
// Falltrough: We generate scalar instructions, if we don't know how to
// generate vector code.
for (int VectorLane = 0; VectorLane < getVectorWidth(); VectorLane++)
copyInstScalar(Inst, ScalarMaps[VectorLane], GlobalMaps[VectorLane]);
copyInstScalarized(Inst, VectorMap, ScalarMaps);
void VectorBlockGenerator::copyBB() {

View File

@ -0,0 +1,43 @@
; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-vector -S %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float] zeroinitializer, align 16
@B = common global [1024 x float] zeroinitializer, align 16
declare float @foo(float) readnone
define void @simple_vec_call() nounwind {
br label %body
%indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
%scevgep = getelementptr [1024 x float]* @B, i64 0, i64 %indvar
%value = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
%result = tail call float @foo(float %value) nounwind
store float %result, float* %scevgep, align 4
%indvar_next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar_next, 4
br i1 %exitcond, label %return, label %body
ret void
; CHECK: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
; CHECK: %p_result = tail call float @foo(float %0) nounwind
; CHECK: %p_result4 = tail call float @foo(float %1) nounwind
; CHECK: %p_result5 = tail call float @foo(float %2) nounwind
; CHECK: %p_result6 = tail call float @foo(float %3) nounwind
; CHECK: %4 = insertelement <4 x float> undef, float %p_result, i32 0
; CHECK: %5 = insertelement <4 x float> %4, float %p_result4, i32 1
; CHECK: %6 = insertelement <4 x float> %5, float %p_result5, i32 2
; CHECK: %7 = insertelement <4 x float> %6, float %p_result6, i32 3
; CHECK: %vector_ptr = bitcast float* %p_scevgep to <4 x float>*
; CHECK: store <4 x float> %7, <4 x float>* %vector_ptr, align 8

View File

@ -0,0 +1,45 @@
; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-vector -dce -S %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float] zeroinitializer, align 16
@B = common global [1024 x float**] zeroinitializer, align 16
declare float** @foo(float) readnone
define void @simple_vec_call() nounwind {
br label %body
%indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
%scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 %indvar
%value = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
%result = tail call float** @foo(float %value) nounwind
store float** %result, float*** %scevgep, align 4
%indvar_next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar_next, 4
br i1 %exitcond, label %return, label %body
ret void
; CHECK: %p_scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 0
; CHECK: %p_scevgep1 = getelementptr [1024 x float**]* @B, i64 0, i64 1
; CHECK: %p_scevgep2 = getelementptr [1024 x float**]* @B, i64 0, i64 2
; CHECK: %p_scevgep3 = getelementptr [1024 x float**]* @B, i64 0, i64 3
; CHECK: %value_p_splat_one = load <1 x float>* bitcast ([1024 x float]* @A to <1 x float>*), align 8
; CHECK: %value_p_splat = shufflevector <1 x float> %value_p_splat_one, <1 x float> %value_p_splat_one, <4 x i32> zeroinitializer
; CHECK: %0 = extractelement <4 x float> %value_p_splat, i32 0
; CHECK: %1 = extractelement <4 x float> %value_p_splat, i32 1
; CHECK: %2 = extractelement <4 x float> %value_p_splat, i32 2
; CHECK: %3 = extractelement <4 x float> %value_p_splat, i32 3
; CHECK: %p_result = tail call float** @foo(float %0) nounwind
; CHECK: %p_result4 = tail call float** @foo(float %1) nounwind
; CHECK: %p_result5 = tail call float** @foo(float %2) nounwind
; CHECK: %p_result6 = tail call float** @foo(float %3) nounwind
; CHECK: store float** %p_result, float*** %p_scevgep, align 4
; CHECK: store float** %p_result4, float*** %p_scevgep1, align 4
; CHECK: store float** %p_result5, float*** %p_scevgep2, align 4
; CHECK: store float** %p_result6, float*** %p_scevgep3, align 4

View File

@ -0,0 +1,38 @@
; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-vector -S %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
@A = common global [1024 x float**] zeroinitializer, align 16
@B = common global [1024 x float**] zeroinitializer, align 16
declare float @foo(float) readnone
define void @simple_vec_call() nounwind {
br label %body
%indvar = phi i64 [ 0, %entry ], [ %indvar_next, %body ]
%scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 %indvar
%value = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0), align 16
store float** %value, float*** %scevgep, align 4
%indvar_next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar_next, 4
br i1 %exitcond, label %return, label %body
ret void
; CHECK: %p_scevgep = getelementptr [1024 x float**]* @B, i64 0, i64 0
; CHECK: %p_scevgep1 = getelementptr [1024 x float**]* @B, i64 0, i64 1
; CHECK: %p_scevgep2 = getelementptr [1024 x float**]* @B, i64 0, i64 2
; CHECK: %p_scevgep3 = getelementptr [1024 x float**]* @B, i64 0, i64 3
; CHECK: %value_p_scalar_ = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
; CHECK: %value_p_scalar_4 = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
; CHECK: %value_p_scalar_5 = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
; CHECK: %value_p_scalar_6 = load float*** getelementptr inbounds ([1024 x float**]* @A, i64 0, i64 0)
; CHECK: store float** %value_p_scalar_, float*** %p_scevgep, align 4
; CHECK: store float** %value_p_scalar_4, float*** %p_scevgep1, align 4
; CHECK: store float** %value_p_scalar_5, float*** %p_scevgep2, align 4
; CHECK: store float** %value_p_scalar_6, float*** %p_scevgep3, align 4