diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 36570b49bb0b..763ce0838377 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6326,9 +6326,16 @@ LoopVectorizationCostModel::getSmallestAndWidestTypes() { T = ST->getValueOperand()->getType(); // Ignore loaded pointer types and stored pointer types that are not - // consecutive. However, we do want to take consecutive stores/loads of - // pointer vectors into account. - if (T->isPointerTy() && !isConsecutiveLoadOrStore(&I)) + // vectorizable. + // + // FIXME: The check here attempts to predict whether a load or store will + // be vectorized. We only know this for certain after a VF has + // been selected. Here, we assume that if an access can be + // vectorized, it will be. We should also look at extending this + // optimization to non-pointer types. + // + if (T->isPointerTy() && !isConsecutiveLoadOrStore(&I) && + !Legal->isAccessInterleaved(&I) && !Legal->isLegalGatherOrScatter(&I)) continue; MinWidth = std::min(MinWidth, diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll new file mode 100644 index 000000000000..1ae7dadeffd7 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll @@ -0,0 +1,33 @@ +; REQUIRES: asserts +; RUN: opt < %s -loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +; CHECK-LABEL: Checking a loop in "interleaved_access" +; CHECK: The Smallest and Widest types: 64 / 64 bits +; +define void @interleaved_access(i8** %A, i64 %N) { +for.ph: + br label %for.body + +for.body: + %i = phi i64 [ %i.next.3, %for.body ], [ 0, %for.ph ] + %tmp0 = getelementptr inbounds i8*, i8** %A, i64 %i + store i8* null, i8** %tmp0, align 8 + %i.next.0 = add nuw nsw i64 %i, 1 + %tmp1 = getelementptr inbounds i8*, i8** %A, i64 %i.next.0 + store i8* null, i8** %tmp1, align 8 + %i.next.1 = add nsw i64 %i, 2 + %tmp2 = getelementptr inbounds i8*, i8** %A, i64 %i.next.1 + store i8* null, i8** %tmp2, align 8 + %i.next.2 = add nsw i64 %i, 3 + %tmp3 = getelementptr inbounds i8*, i8** %A, i64 %i.next.2 + store i8* null, i8** %tmp3, align 8 + %i.next.3 = add nsw i64 %i, 4 + %cond = icmp slt i64 %i.next.3, %N + br i1 %cond, label %for.body, label %for.end + +for.end: + ret void +}