diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index f82a9a6dd4f1..5626e69398ee 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -692,10 +692,6 @@ static int CompareSCEVComplexity( if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; - // Compare NoWrap flags. - if (LA->getNoWrapFlags() != RA->getNoWrapFlags()) - return (int)LA->getNoWrapFlags() - (int)RA->getNoWrapFlags(); - // Lexicographically compare. for (unsigned i = 0; i != LNumOps; ++i) { int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, @@ -720,10 +716,6 @@ static int CompareSCEVComplexity( if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; - // Compare NoWrap flags. - if (LC->getNoWrapFlags() != RC->getNoWrapFlags()) - return (int)LC->getNoWrapFlags() - (int)RC->getNoWrapFlags(); - for (unsigned i = 0; i != LNumOps; ++i) { int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getOperand(i), RC->getOperand(i), DT, diff --git a/llvm/test/Transforms/IRCE/conjunctive-checks.ll b/llvm/test/Transforms/IRCE/conjunctive-checks.ll index 4e3cf3541250..60a0af831746 100644 --- a/llvm/test/Transforms/IRCE/conjunctive-checks.ll +++ b/llvm/test/Transforms/IRCE/conjunctive-checks.ll @@ -5,10 +5,10 @@ define void @f_0(i32 *%arr, i32 *%a_len_ptr, i32 %n, i1* %cond_buf) { ; CHECK-LABEL: @f_0( ; CHECK: loop.preheader: -; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]] -; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]] +; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len +; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] +; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] ; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 diff --git a/llvm/test/Transforms/IRCE/single-access-no-preloop.ll b/llvm/test/Transforms/IRCE/single-access-no-preloop.ll index acca948a7abe..fb643139c6dc 100644 --- a/llvm/test/Transforms/IRCE/single-access-no-preloop.ll +++ b/llvm/test/Transforms/IRCE/single-access-no-preloop.ll @@ -86,10 +86,10 @@ define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i3 ; CHECK-LABEL: @single_access_no_preloop_with_offset( ; CHECK: loop.preheader: -; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len ; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n -; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]] -; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]] +; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len +; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]] +; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]] ; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll new file mode 100644 index 000000000000..7f29a73bcf9f --- /dev/null +++ b/llvm/test/Transforms/LoadStoreVectorizer/X86/compare-scev-by-complexity.ll @@ -0,0 +1,76 @@ +; RUN: opt -load-store-vectorizer %s -S | FileCheck %s + +; Check that setting wrapping flags after a SCEV node is created +; does not invalidate "sorted by complexity" invariant for +; operands of commutative and associative SCEV operators. + +target triple = "x86_64--" + +@global_value0 = external constant i32 +@global_value1 = external constant i32 +@other_value = external global float +@a = external global float +@b = external global float +@c = external global float +@d = external global float +@plus1 = external global i32 +@cnd = external global i8 + +; Function Attrs: nounwind +define void @main() local_unnamed_addr #0 { +; CHECK-LABEL: @main() +; CHECK: [[PTR:%[0-9]+]] = bitcast float* %preheader.load0.address to <2 x float>* +; CHECK: = load <2 x float>, <2 x float>* [[PTR]] +; CHECK-LABEL: for.body23: +entry: + %tmp = load i32, i32* @global_value0, !range !0 + %tmp2 = load i32, i32* @global_value1 + %and.i.i = and i32 %tmp2, 2 + %add.nuw.nsw.i.i = add nuw nsw i32 %and.i.i, 0 + %mul.i.i = shl nuw nsw i32 %add.nuw.nsw.i.i, 1 + %and6.i.i = and i32 %tmp2, 3 + %and9.i.i = and i32 %tmp2, 4 + %add.nuw.nsw10.i.i = add nuw nsw i32 %and6.i.i, %and9.i.i + %conv3.i42.i = add nuw nsw i32 %mul.i.i, 1 + %reass.add346.7 = add nuw nsw i32 %add.nuw.nsw10.i.i, 56 + %reass.mul347.7 = mul nuw nsw i32 %tmp, %reass.add346.7 + %add7.i.7 = add nuw nsw i32 %reass.mul347.7, 0 + %preheader.address0.idx = add nuw nsw i32 %add7.i.7, %mul.i.i + %preheader.address0.idx.zext = zext i32 %preheader.address0.idx to i64 + %preheader.load0.address = getelementptr inbounds float, float* @other_value, i64 %preheader.address0.idx.zext + %preheader.load0. = load float, float* %preheader.load0.address, align 4, !tbaa !1 + %common.address.idx = add nuw nsw i32 %add7.i.7, %conv3.i42.i + %preheader.header.common.address.idx.zext = zext i32 %common.address.idx to i64 + %preheader.load1.address = getelementptr inbounds float, float* @other_value, i64 %preheader.header.common.address.idx.zext + %preheader.load1. = load float, float* %preheader.load1.address, align 4, !tbaa !1 + br label %for.body23 + +for.body23: ; preds = %for.body23, %entry + %loop.header.load0.address = getelementptr inbounds float, float* @other_value, i64 %preheader.header.common.address.idx.zext + %loop.header.load0. = load float, float* %loop.header.load0.address, align 4, !tbaa !1 + %reass.mul343.7 = mul nuw nsw i32 %reass.add346.7, 72 + %add7.i286.7.7 = add nuw nsw i32 %reass.mul343.7, 56 + %add9.i288.7.7 = add nuw nsw i32 %add7.i286.7.7, %mul.i.i + %loop.header.address1.idx = add nuw nsw i32 %add9.i288.7.7, 1 + %loop.header.address1.idx.zext = zext i32 %loop.header.address1.idx to i64 + %loop.header.load1.address = getelementptr inbounds float, float* @other_value, i64 %loop.header.address1.idx.zext + %loop.header.load1. = load float, float* %loop.header.load1.address, align 4, !tbaa !1 + store float %preheader.load0., float* @a, align 4, !tbaa !1 + store float %preheader.load1., float* @b, align 4, !tbaa !1 + store float %loop.header.load0., float* @c, align 4, !tbaa !1 + store float %loop.header.load1., float* @d, align 4, !tbaa !1 + %loaded.cnd = load i8, i8* @cnd + %condition = trunc i8 %loaded.cnd to i1 + br i1 %condition, label %for.body23, label %exit + +exit: + ret void +} + +attributes #0 = { nounwind } + +!0 = !{i32 0, i32 65536} +!1 = !{!2, !2, i64 0} +!2 = !{!"float", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C++ TBAA"} diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll index b8760cb8d509..1f2a20615868 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -40,8 +40,8 @@ define i32 @main() local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[CONV3]], -1 ; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32 ; CHECK-NEXT: [[TMP6:%.*]] = sub i32 -1, [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP3]] -; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP3]], [[TMP6]] +; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP7]], i32 [[TMP3]], i32 [[TMP6]] ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[UMAX]], 2 ; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], [[TMP5]] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP9]], 8 @@ -50,8 +50,8 @@ define i32 @main() local_unnamed_addr #0 { ; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[CONV3]], -1 ; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = sub i32 -1, [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i32 [[TMP12]], [[TMP3]] -; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 [[TMP3]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i32 [[TMP3]], [[TMP12]] +; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[TMP13]], i32 [[TMP3]], i32 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[UMAX1]], 1 ; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP11]] ; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i8