Include the use kind along with the expression in the key of the

use sharing map. The reconcileNewOffset logic already forces a
separate use if the kinds differ, so incorporating the kind in the
key means we can track more sharing opportunities.

More sharing means fewer total uses to track, which means smaller
problem sizes, which means the conservative throttles don't kick
in as often.

llvm-svn: 106396
This commit is contained in:
Dan Gohman 2010-06-19 21:29:59 +00:00
parent 297fb8b9fc
commit 51d00092b6
2 changed files with 337 additions and 2 deletions

View File

@ -1207,6 +1207,30 @@ static bool isAlwaysFoldable(const SCEV *S,
namespace {
/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding
/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind.
struct UseMapDenseMapInfo {
static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() {
return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic);
}
static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() {
return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic);
}
static unsigned
getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) {
unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first);
Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second));
return Result;
}
static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS,
const std::pair<const SCEV *, LSRUse::KindType> &RHS) {
return LHS == RHS;
}
};
/// FormulaSorter - This class implements an ordering for formulae which sorts
/// the by their standalone cost.
class FormulaSorter {
@ -1279,7 +1303,9 @@ class LSRInstance {
}
// Support for sharing of LSRUses between LSRFixups.
typedef DenseMap<const SCEV *, size_t> UseMapTy;
typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>,
size_t,
UseMapDenseMapInfo> UseMapTy;
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
@ -1837,7 +1863,7 @@ LSRInstance::getUse(const SCEV *&Expr,
}
std::pair<UseMapTy::iterator, bool> P =
UseMap.insert(std::make_pair(Expr, 0));
UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0));
if (!P.second) {
// A use already existed with this base.
size_t LUIdx = P.first->second;

View File

@ -440,3 +440,312 @@ bb5: ; preds = %bb3, %entry
%s.1.lcssa = phi i32 [ 0, %entry ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=1]
ret i32 %s.1.lcssa
}
; Two loops here are of particular interest; the one at %bb21, where
; we don't want to leave extra induction variables around, or use an
; lea to compute an exit condition inside the loop:
; CHECK: test:
; CHECK: BB10_4:
; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: addss %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: mulss (%r{{[^,]*}}), %xmm{{.*}}
; CHECK-NEXT: movss %xmm{{.*}}, (%r{{[^,]*}})
; CHECK-NEXT: addq $4, %r{{.*}}
; CHECK-NEXT: decq %r{{.*}}
; CHECK-NEXT: addq $4, %r{{.*}}
; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: BB10_2:
; CHECK-NEXT: testq %r{{.*}}, %r{{.*}}
; CHECK-NEXT: jle
; CHECK-NEXT: testb $15, %r{{.*}}
; CHECK-NEXT: jne
; And the one at %bb68, where we want to be sure to use superhero mode:
; CHECK: BB10_10:
; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: mulps 48(%r{{[^,]*}}), %xmm{{.*}}
; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: mulps 32(%r{{[^,]*}}), %xmm{{.*}}
; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: mulps 16(%r{{[^,]*}}), %xmm{{.*}}
; CHECK-NEXT: movaps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: mulps (%r{{[^,]*}}), %xmm{{.*}}
; CHECK-NEXT: movaps %xmm{{.*}}, (%r{{[^,]*}})
; CHECK-NEXT: movaps %xmm{{.*}}, 16(%r{{[^,]*}})
; CHECK-NEXT: movaps %xmm{{.*}}, 32(%r{{[^,]*}})
; CHECK-NEXT: movaps %xmm{{.*}}, 48(%r{{[^,]*}})
; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: addps %xmm{{.*}}, %xmm{{.*}}
; CHECK-NEXT: addq $64, %r{{.*}}
; CHECK-NEXT: addq $64, %r{{.*}}
; CHECK-NEXT: addq $-16, %r{{.*}}
; CHECK-NEXT: BB10_11:
; CHECK-NEXT: cmpq $15, %r{{.*}}
; CHECK-NEXT: jg
define void @test(float* %arg, i64 %arg1, float* nocapture %arg2, float* nocapture %arg3, float* %arg4, i64 %arg5, i64 %arg6) nounwind {
bb:
%t = alloca float, align 4 ; <float*> [#uses=3]
%t7 = alloca float, align 4 ; <float*> [#uses=2]
%t8 = load float* %arg3 ; <float> [#uses=8]
%t9 = ptrtoint float* %arg to i64 ; <i64> [#uses=1]
%t10 = ptrtoint float* %arg4 to i64 ; <i64> [#uses=1]
%t11 = xor i64 %t10, %t9 ; <i64> [#uses=1]
%t12 = and i64 %t11, 15 ; <i64> [#uses=1]
%t13 = icmp eq i64 %t12, 0 ; <i1> [#uses=1]
%t14 = xor i64 %arg1, 1 ; <i64> [#uses=1]
%t15 = xor i64 %arg5, 1 ; <i64> [#uses=1]
%t16 = or i64 %t15, %t14 ; <i64> [#uses=1]
%t17 = trunc i64 %t16 to i32 ; <i32> [#uses=1]
%t18 = icmp eq i32 %t17, 0 ; <i1> [#uses=1]
br i1 %t18, label %bb19, label %bb213
bb19: ; preds = %bb
%t20 = load float* %arg2 ; <float> [#uses=1]
br label %bb21
bb21: ; preds = %bb32, %bb19
%t22 = phi i64 [ %t36, %bb32 ], [ 0, %bb19 ] ; <i64> [#uses=21]
%t23 = phi float [ %t35, %bb32 ], [ %t20, %bb19 ] ; <float> [#uses=6]
%t24 = sub i64 %arg6, %t22 ; <i64> [#uses=4]
%t25 = getelementptr float* %arg4, i64 %t22 ; <float*> [#uses=4]
%t26 = getelementptr float* %arg, i64 %t22 ; <float*> [#uses=3]
%t27 = icmp sgt i64 %t24, 0 ; <i1> [#uses=1]
br i1 %t27, label %bb28, label %bb37
bb28: ; preds = %bb21
%t29 = ptrtoint float* %t25 to i64 ; <i64> [#uses=1]
%t30 = and i64 %t29, 15 ; <i64> [#uses=1]
%t31 = icmp eq i64 %t30, 0 ; <i1> [#uses=1]
br i1 %t31, label %bb37, label %bb32
bb32: ; preds = %bb28
%t33 = load float* %t26 ; <float> [#uses=1]
%t34 = fmul float %t23, %t33 ; <float> [#uses=1]
store float %t34, float* %t25
%t35 = fadd float %t23, %t8 ; <float> [#uses=1]
%t36 = add i64 %t22, 1 ; <i64> [#uses=1]
br label %bb21
bb37: ; preds = %bb28, %bb21
%t38 = fmul float %t8, 4.000000e+00 ; <float> [#uses=1]
store float %t38, float* %t
%t39 = fmul float %t8, 1.600000e+01 ; <float> [#uses=1]
store float %t39, float* %t7
%t40 = fmul float %t8, 0.000000e+00 ; <float> [#uses=1]
%t41 = fadd float %t23, %t40 ; <float> [#uses=1]
%t42 = insertelement <4 x float> undef, float %t41, i32 0 ; <<4 x float>> [#uses=1]
%t43 = fadd float %t23, %t8 ; <float> [#uses=1]
%t44 = insertelement <4 x float> %t42, float %t43, i32 1 ; <<4 x float>> [#uses=1]
%t45 = fmul float %t8, 2.000000e+00 ; <float> [#uses=1]
%t46 = fadd float %t23, %t45 ; <float> [#uses=1]
%t47 = insertelement <4 x float> %t44, float %t46, i32 2 ; <<4 x float>> [#uses=1]
%t48 = fmul float %t8, 3.000000e+00 ; <float> [#uses=1]
%t49 = fadd float %t23, %t48 ; <float> [#uses=1]
%t50 = insertelement <4 x float> %t47, float %t49, i32 3 ; <<4 x float>> [#uses=5]
%t51 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=3]
%t52 = fadd <4 x float> %t50, %t51 ; <<4 x float>> [#uses=3]
%t53 = fadd <4 x float> %t52, %t51 ; <<4 x float>> [#uses=3]
%t54 = fadd <4 x float> %t53, %t51 ; <<4 x float>> [#uses=2]
%t55 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t7) nounwind ; <<4 x float>> [#uses=8]
%t56 = icmp sgt i64 %t24, 15 ; <i1> [#uses=2]
br i1 %t13, label %bb57, label %bb118
bb57: ; preds = %bb37
br i1 %t56, label %bb61, label %bb112
bb58: ; preds = %bb68
%t59 = getelementptr float* %arg, i64 %t78 ; <float*> [#uses=1]
%t60 = getelementptr float* %arg4, i64 %t78 ; <float*> [#uses=1]
br label %bb112
bb61: ; preds = %bb57
%t62 = add i64 %t22, 16 ; <i64> [#uses=1]
%t63 = add i64 %t22, 4 ; <i64> [#uses=1]
%t64 = add i64 %t22, 8 ; <i64> [#uses=1]
%t65 = add i64 %t22, 12 ; <i64> [#uses=1]
%t66 = add i64 %arg6, -16 ; <i64> [#uses=1]
%t67 = sub i64 %t66, %t22 ; <i64> [#uses=1]
br label %bb68
bb68: ; preds = %bb68, %bb61
%t69 = phi i64 [ 0, %bb61 ], [ %t111, %bb68 ] ; <i64> [#uses=3]
%t70 = phi <4 x float> [ %t54, %bb61 ], [ %t107, %bb68 ] ; <<4 x float>> [#uses=2]
%t71 = phi <4 x float> [ %t50, %bb61 ], [ %t103, %bb68 ] ; <<4 x float>> [#uses=2]
%t72 = phi <4 x float> [ %t53, %bb61 ], [ %t108, %bb68 ] ; <<4 x float>> [#uses=2]
%t73 = phi <4 x float> [ %t52, %bb61 ], [ %t109, %bb68 ] ; <<4 x float>> [#uses=2]
%t74 = shl i64 %t69, 4 ; <i64> [#uses=5]
%t75 = add i64 %t22, %t74 ; <i64> [#uses=2]
%t76 = getelementptr float* %arg, i64 %t75 ; <float*> [#uses=1]
%t77 = bitcast float* %t76 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t78 = add i64 %t62, %t74 ; <i64> [#uses=2]
%t79 = add i64 %t63, %t74 ; <i64> [#uses=2]
%t80 = getelementptr float* %arg, i64 %t79 ; <float*> [#uses=1]
%t81 = bitcast float* %t80 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t82 = add i64 %t64, %t74 ; <i64> [#uses=2]
%t83 = getelementptr float* %arg, i64 %t82 ; <float*> [#uses=1]
%t84 = bitcast float* %t83 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t85 = add i64 %t65, %t74 ; <i64> [#uses=2]
%t86 = getelementptr float* %arg, i64 %t85 ; <float*> [#uses=1]
%t87 = bitcast float* %t86 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t88 = getelementptr float* %arg4, i64 %t75 ; <float*> [#uses=1]
%t89 = bitcast float* %t88 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t90 = getelementptr float* %arg4, i64 %t79 ; <float*> [#uses=1]
%t91 = bitcast float* %t90 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t92 = getelementptr float* %arg4, i64 %t82 ; <float*> [#uses=1]
%t93 = bitcast float* %t92 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t94 = getelementptr float* %arg4, i64 %t85 ; <float*> [#uses=1]
%t95 = bitcast float* %t94 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t96 = mul i64 %t69, -16 ; <i64> [#uses=1]
%t97 = add i64 %t67, %t96 ; <i64> [#uses=2]
%t98 = load <4 x float>* %t77 ; <<4 x float>> [#uses=1]
%t99 = load <4 x float>* %t81 ; <<4 x float>> [#uses=1]
%t100 = load <4 x float>* %t84 ; <<4 x float>> [#uses=1]
%t101 = load <4 x float>* %t87 ; <<4 x float>> [#uses=1]
%t102 = fmul <4 x float> %t98, %t71 ; <<4 x float>> [#uses=1]
%t103 = fadd <4 x float> %t71, %t55 ; <<4 x float>> [#uses=2]
%t104 = fmul <4 x float> %t99, %t73 ; <<4 x float>> [#uses=1]
%t105 = fmul <4 x float> %t100, %t72 ; <<4 x float>> [#uses=1]
%t106 = fmul <4 x float> %t101, %t70 ; <<4 x float>> [#uses=1]
store <4 x float> %t102, <4 x float>* %t89
store <4 x float> %t104, <4 x float>* %t91
store <4 x float> %t105, <4 x float>* %t93
store <4 x float> %t106, <4 x float>* %t95
%t107 = fadd <4 x float> %t70, %t55 ; <<4 x float>> [#uses=1]
%t108 = fadd <4 x float> %t72, %t55 ; <<4 x float>> [#uses=1]
%t109 = fadd <4 x float> %t73, %t55 ; <<4 x float>> [#uses=1]
%t110 = icmp sgt i64 %t97, 15 ; <i1> [#uses=1]
%t111 = add i64 %t69, 1 ; <i64> [#uses=1]
br i1 %t110, label %bb68, label %bb58
bb112: ; preds = %bb58, %bb57
%t113 = phi float* [ %t59, %bb58 ], [ %t26, %bb57 ] ; <float*> [#uses=1]
%t114 = phi float* [ %t60, %bb58 ], [ %t25, %bb57 ] ; <float*> [#uses=1]
%t115 = phi <4 x float> [ %t103, %bb58 ], [ %t50, %bb57 ] ; <<4 x float>> [#uses=1]
%t116 = phi i64 [ %t97, %bb58 ], [ %t24, %bb57 ] ; <i64> [#uses=1]
%t117 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=0]
br label %bb194
bb118: ; preds = %bb37
br i1 %t56, label %bb122, label %bb194
bb119: ; preds = %bb137
%t120 = getelementptr float* %arg, i64 %t145 ; <float*> [#uses=1]
%t121 = getelementptr float* %arg4, i64 %t145 ; <float*> [#uses=1]
br label %bb194
bb122: ; preds = %bb118
%t123 = add i64 %t22, -1 ; <i64> [#uses=1]
%t124 = getelementptr inbounds float* %arg, i64 %t123 ; <float*> [#uses=1]
%t125 = bitcast float* %t124 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t126 = load <4 x float>* %t125 ; <<4 x float>> [#uses=1]
%t127 = add i64 %t22, 16 ; <i64> [#uses=1]
%t128 = add i64 %t22, 3 ; <i64> [#uses=1]
%t129 = add i64 %t22, 7 ; <i64> [#uses=1]
%t130 = add i64 %t22, 11 ; <i64> [#uses=1]
%t131 = add i64 %t22, 15 ; <i64> [#uses=1]
%t132 = add i64 %t22, 4 ; <i64> [#uses=1]
%t133 = add i64 %t22, 8 ; <i64> [#uses=1]
%t134 = add i64 %t22, 12 ; <i64> [#uses=1]
%t135 = add i64 %arg6, -16 ; <i64> [#uses=1]
%t136 = sub i64 %t135, %t22 ; <i64> [#uses=1]
br label %bb137
bb137: ; preds = %bb137, %bb122
%t138 = phi i64 [ 0, %bb122 ], [ %t193, %bb137 ] ; <i64> [#uses=3]
%t139 = phi <4 x float> [ %t54, %bb122 ], [ %t189, %bb137 ] ; <<4 x float>> [#uses=2]
%t140 = phi <4 x float> [ %t50, %bb122 ], [ %t185, %bb137 ] ; <<4 x float>> [#uses=2]
%t141 = phi <4 x float> [ %t53, %bb122 ], [ %t190, %bb137 ] ; <<4 x float>> [#uses=2]
%t142 = phi <4 x float> [ %t52, %bb122 ], [ %t191, %bb137 ] ; <<4 x float>> [#uses=2]
%t143 = phi <4 x float> [ %t126, %bb122 ], [ %t175, %bb137 ] ; <<4 x float>> [#uses=1]
%t144 = shl i64 %t138, 4 ; <i64> [#uses=9]
%t145 = add i64 %t127, %t144 ; <i64> [#uses=2]
%t146 = add i64 %t128, %t144 ; <i64> [#uses=1]
%t147 = getelementptr float* %arg, i64 %t146 ; <float*> [#uses=1]
%t148 = bitcast float* %t147 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t149 = add i64 %t129, %t144 ; <i64> [#uses=1]
%t150 = getelementptr float* %arg, i64 %t149 ; <float*> [#uses=1]
%t151 = bitcast float* %t150 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t152 = add i64 %t130, %t144 ; <i64> [#uses=1]
%t153 = getelementptr float* %arg, i64 %t152 ; <float*> [#uses=1]
%t154 = bitcast float* %t153 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t155 = add i64 %t131, %t144 ; <i64> [#uses=1]
%t156 = getelementptr float* %arg, i64 %t155 ; <float*> [#uses=1]
%t157 = bitcast float* %t156 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t158 = add i64 %t22, %t144 ; <i64> [#uses=1]
%t159 = getelementptr float* %arg4, i64 %t158 ; <float*> [#uses=1]
%t160 = bitcast float* %t159 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t161 = add i64 %t132, %t144 ; <i64> [#uses=1]
%t162 = getelementptr float* %arg4, i64 %t161 ; <float*> [#uses=1]
%t163 = bitcast float* %t162 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t164 = add i64 %t133, %t144 ; <i64> [#uses=1]
%t165 = getelementptr float* %arg4, i64 %t164 ; <float*> [#uses=1]
%t166 = bitcast float* %t165 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t167 = add i64 %t134, %t144 ; <i64> [#uses=1]
%t168 = getelementptr float* %arg4, i64 %t167 ; <float*> [#uses=1]
%t169 = bitcast float* %t168 to <4 x float>* ; <<4 x float>*> [#uses=1]
%t170 = mul i64 %t138, -16 ; <i64> [#uses=1]
%t171 = add i64 %t136, %t170 ; <i64> [#uses=2]
%t172 = load <4 x float>* %t148 ; <<4 x float>> [#uses=2]
%t173 = load <4 x float>* %t151 ; <<4 x float>> [#uses=2]
%t174 = load <4 x float>* %t154 ; <<4 x float>> [#uses=2]
%t175 = load <4 x float>* %t157 ; <<4 x float>> [#uses=2]
%t176 = shufflevector <4 x float> %t143, <4 x float> %t172, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
%t177 = shufflevector <4 x float> %t176, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
%t178 = shufflevector <4 x float> %t172, <4 x float> %t173, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
%t179 = shufflevector <4 x float> %t178, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
%t180 = shufflevector <4 x float> %t173, <4 x float> %t174, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
%t181 = shufflevector <4 x float> %t180, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
%t182 = shufflevector <4 x float> %t174, <4 x float> %t175, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
%t183 = shufflevector <4 x float> %t182, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
%t184 = fmul <4 x float> %t177, %t140 ; <<4 x float>> [#uses=1]
%t185 = fadd <4 x float> %t140, %t55 ; <<4 x float>> [#uses=2]
%t186 = fmul <4 x float> %t179, %t142 ; <<4 x float>> [#uses=1]
%t187 = fmul <4 x float> %t181, %t141 ; <<4 x float>> [#uses=1]
%t188 = fmul <4 x float> %t183, %t139 ; <<4 x float>> [#uses=1]
store <4 x float> %t184, <4 x float>* %t160
store <4 x float> %t186, <4 x float>* %t163
store <4 x float> %t187, <4 x float>* %t166
store <4 x float> %t188, <4 x float>* %t169
%t189 = fadd <4 x float> %t139, %t55 ; <<4 x float>> [#uses=1]
%t190 = fadd <4 x float> %t141, %t55 ; <<4 x float>> [#uses=1]
%t191 = fadd <4 x float> %t142, %t55 ; <<4 x float>> [#uses=1]
%t192 = icmp sgt i64 %t171, 15 ; <i1> [#uses=1]
%t193 = add i64 %t138, 1 ; <i64> [#uses=1]
br i1 %t192, label %bb137, label %bb119
bb194: ; preds = %bb119, %bb118, %bb112
%t195 = phi i64 [ %t116, %bb112 ], [ %t171, %bb119 ], [ %t24, %bb118 ] ; <i64> [#uses=2]
%t196 = phi <4 x float> [ %t115, %bb112 ], [ %t185, %bb119 ], [ %t50, %bb118 ] ; <<4 x float>> [#uses=1]
%t197 = phi float* [ %t114, %bb112 ], [ %t121, %bb119 ], [ %t25, %bb118 ] ; <float*> [#uses=1]
%t198 = phi float* [ %t113, %bb112 ], [ %t120, %bb119 ], [ %t26, %bb118 ] ; <float*> [#uses=1]
%t199 = extractelement <4 x float> %t196, i32 0 ; <float> [#uses=2]
%t200 = icmp sgt i64 %t195, 0 ; <i1> [#uses=1]
br i1 %t200, label %bb201, label %bb211
bb201: ; preds = %bb201, %bb194
%t202 = phi i64 [ %t209, %bb201 ], [ 0, %bb194 ] ; <i64> [#uses=3]
%t203 = phi float [ %t208, %bb201 ], [ %t199, %bb194 ] ; <float> [#uses=2]
%t204 = getelementptr float* %t198, i64 %t202 ; <float*> [#uses=1]
%t205 = getelementptr float* %t197, i64 %t202 ; <float*> [#uses=1]
%t206 = load float* %t204 ; <float> [#uses=1]
%t207 = fmul float %t203, %t206 ; <float> [#uses=1]
store float %t207, float* %t205
%t208 = fadd float %t203, %t8 ; <float> [#uses=2]
%t209 = add i64 %t202, 1 ; <i64> [#uses=2]
%t210 = icmp eq i64 %t209, %t195 ; <i1> [#uses=1]
br i1 %t210, label %bb211, label %bb201
bb211: ; preds = %bb201, %bb194
%t212 = phi float [ %t199, %bb194 ], [ %t208, %bb201 ] ; <float> [#uses=1]
store float %t212, float* %arg2
ret void
bb213: ; preds = %bb
ret void
}