From d96e427eacdd4d48da3a1f93a64008ef4dadcc8a Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Sun, 5 May 2013 01:54:48 +0000 Subject: [PATCH] LoopVectorize: Add support for floating point min/max reductions Add support for min/max reductions when "no-nans-float-math" is enabled. This allows us to assume we have ordered floating point math and treat ordered and unordered predicates equally. radar://13723044 llvm-svn: 181144 --- .../Transforms/Vectorize/LoopVectorize.cpp | 91 +++- .../LoopVectorize/minmax_reduction.ll | 480 ++++++++++++++++++ 2 files changed, 549 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9f9058755144..c7ff7c3fceb1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -335,7 +335,7 @@ public: DominatorTree *DT, TargetTransformInfo* TTI, AliasAnalysis *AA, TargetLibraryInfo *TLI) : TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), TLI(TLI), - Induction(0) {} + Induction(0), HasFunNoNaNAttr(false) {} /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -347,7 +347,8 @@ public: RK_IntegerXor, ///< Bitwise or logical XOR of numbers. RK_IntegerMinMax, ///< Min/max implemented in terms of select(cmp()). RK_FloatAdd, ///< Sum of floats. - RK_FloatMult ///< Product of floats. + RK_FloatMult, ///< Product of floats. + RK_FloatMinMax ///< Min/max implemented in terms of select(cmp()). }; /// This enum represents the kinds of inductions that we support. @@ -365,7 +366,9 @@ public: MRK_UIntMin, MRK_UIntMax, MRK_SIntMin, - MRK_SIntMax + MRK_SIntMax, + MRK_FloatMin, + MRK_FloatMax }; /// This POD struct holds information about reduction variables. @@ -586,6 +589,8 @@ private: /// We need to check that all of the pointers in this list are disjoint /// at runtime. RuntimePointerCheck PtrRtCheck; + /// Can we assume the absence of NaNs. + bool HasFunNoNaNAttr; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -1648,6 +1653,8 @@ getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) { return Instruction::FAdd; case LoopVectorizationLegality::RK_IntegerMinMax: return Instruction::ICmp; + case LoopVectorizationLegality::RK_FloatMinMax: + return Instruction::FCmp; default: llvm_unreachable("Unknown reduction operation"); } @@ -1672,8 +1679,21 @@ Value *createMinMaxOp(IRBuilder<> &Builder, break; case LoopVectorizationLegality::MRK_SIntMax: P = CmpInst::ICMP_SGT; + break; + case LoopVectorizationLegality::MRK_FloatMin: + P = CmpInst::FCMP_OLT; + break; + case LoopVectorizationLegality::MRK_FloatMax: + P = CmpInst::FCMP_OGT; + break; } - Value *Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); + + Value *Cmp; + if (RK == LoopVectorizationLegality::MRK_FloatMin || RK == LoopVectorizationLegality::MRK_FloatMax) + Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp"); + else + Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp"); + Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select"); return Select; } @@ -1743,11 +1763,12 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // one for multiplication, -1 for And. Value *Identity; Value *VectorStart; - if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax) + if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax || + RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) { // MinMax reduction have the start value as their identify. VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue, "minmax.ident"); - else { + } else { Constant *Iden = LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind, VecTy->getScalarType()); @@ -1801,7 +1822,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { Value *ReducedPartRdx = RdxParts[0]; unsigned Op = getReductionBinOp(RdxDesc.Kind); for (unsigned part = 1; part < UF; ++part) { - if (Op != Instruction::ICmp) + if (Op != Instruction::ICmp && Op != Instruction::FCmp) ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op, RdxParts[part], ReducedPartRdx, "bin.rdx"); @@ -1832,7 +1853,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { ConstantVector::get(ShuffleMask), "rdx.shuf"); - if (Op != Instruction::ICmp) + if (Op != Instruction::ICmp && Op != Instruction::FCmp) TmpVec = Builder.CreateBinOp((Instruction::BinaryOps)Op, TmpVec, Shuf, "bin.rdx"); else @@ -2363,6 +2384,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { return false; } + // Look for the attribute signaling the absence of NaNs. + Function &F = *Header->getParent(); + if (F.hasFnAttribute("no-nans-fp-math")) + HasFunNoNaNAttr = F.getAttributes().getAttribute( + AttributeSet::FunctionIndex, + "no-nans-fp-math").getValueAsString() == "true"; + // For each block in the loop. for (Loop::block_iterator bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be; ++bb) { @@ -2444,6 +2472,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { DEBUG(dbgs() << "LV: Found an FAdd reduction PHI."<< *Phi <<"\n"); continue; } + if (AddReductionVar(Phi, RK_FloatMinMax)) { + DEBUG(dbgs() << "LV: Found an float MINMAX reduction PHI."<< *Phi <<"\n"); + continue; + } DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n"); return false; @@ -2869,7 +2901,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // such that we don't stop when we see the phi has two uses (one by the select // and one by the icmp) and to make sure we only see exactly the two // instructions. - unsigned NumICmpSelectPatternInst = 0; + unsigned NumCmpSelectPatternInst = 0; ReductionInstDesc ReduxDesc(false, 0); // Avoid cycles in the chain. @@ -2918,7 +2950,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // We can't have multiple inside users except for a combination of // icmp/select both using the phi. - if (FoundInBlockUser && !NumICmpSelectPatternInst) + if (FoundInBlockUser && !NumCmpSelectPatternInst) return false; FoundInBlockUser = true; @@ -2927,14 +2959,15 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, if (!ReduxDesc.IsReduction) return false; - if (Kind == RK_IntegerMinMax && (isa(U) || - isa(U))) - ++NumICmpSelectPatternInst; + if (Kind == RK_IntegerMinMax && (isa(U) || isa(U))) + ++NumCmpSelectPatternInst; + if (Kind == RK_FloatMinMax && (isa(U) || isa(U))) + ++NumCmpSelectPatternInst; // Reductions of instructions such as Div, and Sub is only // possible if the LHS is the reduction variable. if (!U->isCommutative() && !isa(U) && !isa(U) && - !isa(U) && U->getOperand(0) != Iter) + !isa(U) && !isa(U) && U->getOperand(0) != Iter) return false; Iter = ReduxDesc.PatternLastInst; @@ -2942,7 +2975,8 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // This means we have seen one but not the other instruction of the // pattern or more than just a select and cmp. - if (Kind == RK_IntegerMinMax && NumICmpSelectPatternInst != 2) + if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) && + NumCmpSelectPatternInst != 2) return false; // We found a reduction var if we have reached the original @@ -2968,16 +3002,17 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction /// pattern corresponding to a min(X, Y) or max(X, Y). LoopVectorizationLegality::ReductionInstDesc -LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, ReductionInstDesc &Prev) { +LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, + ReductionInstDesc &Prev) { - assert((isa(I) || isa(I)) && + assert((isa(I) || isa(I) || isa(I)) && "Expect a select instruction"); - ICmpInst *Cmp = 0; + Instruction *Cmp = 0; SelectInst *Select = 0; // We must handle the select(cmp()) as a single instruction. Advance to the // select. - if ((Cmp = dyn_cast(I))) { + if ((Cmp = dyn_cast(I)) || (Cmp = dyn_cast(I))) { if (!Cmp->hasOneUse() || !(Select = dyn_cast(*I->use_begin()))) return ReductionInstDesc(false, I); return ReductionInstDesc(Select, Prev.MinMaxKind); @@ -2986,7 +3021,8 @@ LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, ReductionIns // Only handle single use cases for now. if (!(Select = dyn_cast(I))) return ReductionInstDesc(false, I); - if (!(Cmp = dyn_cast(I->getOperand(0)))) + if (!(Cmp = dyn_cast(I->getOperand(0))) && + !(Cmp = dyn_cast(I->getOperand(0)))) return ReductionInstDesc(false, I); if (!Cmp->hasOneUse()) return ReductionInstDesc(false, I); @@ -3003,6 +3039,14 @@ LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I, ReductionIns return ReductionInstDesc(Select, MRK_SIntMax); else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) return ReductionInstDesc(Select, MRK_SIntMin); + else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_FloatMin); + else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_FloatMax); + else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_FloatMin); + else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select)) + return ReductionInstDesc(Select, MRK_FloatMax); return ReductionInstDesc(false, I); } @@ -3017,7 +3061,8 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I, default: return ReductionInstDesc(false, I); case Instruction::PHI: - if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd)) + if (FP && (Kind != RK_FloatMult && Kind != RK_FloatAdd && + Kind != RK_FloatMinMax)) return ReductionInstDesc(false, I); return ReductionInstDesc(I, Prev.MinMaxKind); case Instruction::Sub: @@ -3035,9 +3080,11 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I, return ReductionInstDesc(Kind == RK_FloatMult && FastMath, I); case Instruction::FAdd: return ReductionInstDesc(Kind == RK_FloatAdd && FastMath, I); + case Instruction::FCmp: case Instruction::ICmp: case Instruction::Select: - if (Kind != RK_IntegerMinMax) + if (Kind != RK_IntegerMinMax && + (!HasFunNoNaNAttr || Kind != RK_FloatMinMax)) return ReductionInstDesc(false, I); return isMinMaxSelectCmpPattern(I, Prev); } diff --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll index 36a8758e2cc4..502fd8b9383b 100644 --- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll @@ -3,6 +3,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @A = common global [1024 x i32] zeroinitializer, align 16 +@fA = common global [1024 x float] zeroinitializer, align 16 +@dA = common global [1024 x double] zeroinitializer, align 16 ; Signed tests. @@ -403,3 +405,481 @@ for.body: for.end: ret i32 %max.red.0 } + +; Float tests. + +; Maximum. + +; Turn this into a max reduction in the presence of a no-nans-fp-math attribute. +; CHECK: @max_red_float +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @max_red_float(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ogt float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; CHECK: @max_red_float_ge +; CHECK: fcmp oge <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @max_red_float_ge(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp oge float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; CHECK: @inverted_max_red_float +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_max_red_float(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp olt float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; CHECK: @inverted_max_red_float_le +; CHECK: fcmp ole <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_max_red_float_le(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ole float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; CHECK: @unordered_max_red +; CHECK: fcmp ugt <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @unordered_max_red_float(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ugt float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; CHECK: @unordered_max_red_float_ge +; CHECK: fcmp uge <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @unordered_max_red_float_ge(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp uge float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; CHECK: @inverted_unordered_max_red +; CHECK: fcmp ult <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_unordered_max_red_float(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ult float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; CHECK: @inverted_unordered_max_red_float_le +; CHECK: fcmp ule <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_unordered_max_red_float_le(float %max) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ule float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %max.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + +; Minimum. + +; Turn this into a min reduction in the presence of a no-nans-fp-math attribute. +; CHECK: @min_red_float +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @min_red_float(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp olt float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; CHECK: @min_red_float_le +; CHECK: fcmp ole <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @min_red_float_le(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ole float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; CHECK: @inverted_min_red_float +; CHECK: fcmp ogt <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_min_red_float(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ogt float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; CHECK: @inverted_min_red_float_ge +; CHECK: fcmp oge <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_min_red_float_ge(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp oge float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; CHECK: @unordered_min_red +; CHECK: fcmp ult <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @unordered_min_red_float(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ult float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; CHECK: @unordered_min_red_float_le +; CHECK: fcmp ule <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @unordered_min_red_float_le(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ule float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %0, float %min.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; CHECK: @inverted_unordered_min_red +; CHECK: fcmp ugt <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_unordered_min_red_float(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ugt float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; CHECK: @inverted_unordered_min_red_float_ge +; CHECK: fcmp uge <2 x float> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x float> +; CHECK: select <2 x i1> + +define float @inverted_unordered_min_red_float_ge(float %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp uge float %0, %min.red.08 + %min.red.0 = select i1 %cmp3, float %min.red.08, float %0 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %min.red.0 +} + +; Make sure we handle doubles, too. +; CHECK: @min_red_double +; CHECK: fcmp olt <2 x double> +; CHECK: select <2 x i1> +; CHECK: middle.block +; CHECK: fcmp olt <2 x double> +; CHECK: select <2 x i1> + +define double @min_red_double(double %min) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x double]* @dA, i64 0, i64 %indvars.iv + %0 = load double* %arrayidx, align 4 + %cmp3 = fcmp olt double %0, %min.red.08 + %min.red.0 = select i1 %cmp3, double %0, double %min.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret double %min.red.0 +} + + +; Don't this into a max reduction. The no-nans-fp-math attribute is missing +; CHECK: @max_red_float_nans +; CHECK-NOT: <2 x float> + +define float @max_red_float_nans(float %max) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ] + %arrayidx = getelementptr inbounds [1024 x float]* @fA, i64 0, i64 %indvars.iv + %0 = load float* %arrayidx, align 4 + %cmp3 = fcmp ogt float %0, %max.red.08 + %max.red.0 = select i1 %cmp3, float %0, float %max.red.08 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret float %max.red.0 +} + + +attributes #0 = { "no-nans-fp-math"="true" }