[LoopIdiom] Basic OptimizationRemarkEmitter handling

Summary:
I'm adding ORE to memset/memcpy formation, with tests,
but mainly this is split off from D61144.

Reviewers: reames, anemet, thegameg, craig.topper

Reviewed By: thegameg

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62631

llvm-svn: 362092
This commit is contained in:
Roman Lebedev 2019-05-30 13:02:06 +00:00
parent fae2e46766
commit e8578953ac
4 changed files with 147 additions and 5 deletions

View File

@ -51,6 +51,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
@ -119,6 +120,7 @@ class LoopIdiomRecognize {
TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;
const DataLayout *DL;
OptimizationRemarkEmitter &ORE;
bool ApplyCodeSizeHeuristics;
public:
@ -126,8 +128,9 @@ public:
LoopInfo *LI, ScalarEvolution *SE,
TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,
const DataLayout *DL)
: AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL) {}
const DataLayout *DL,
OptimizationRemarkEmitter &ORE)
: AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {}
bool runOnLoop(Loop *L);
@ -220,7 +223,12 @@ public:
*L->getHeader()->getParent());
const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL);
// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE);
return LIR.runOnLoop(L);
}
@ -242,7 +250,19 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
LPMUpdater &) {
const auto *DL = &L.getHeader()->getModule()->getDataLayout();
LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL);
const auto &FAM =
AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
Function *F = L.getHeader()->getParent();
auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
// FIXME: This should probably be optional rather than required.
if (!ORE)
report_fatal_error(
"LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "
"at a higher level");
LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,
*ORE);
if (!LIR.runOnLoop(&L))
return PreservedAnalyses::all();
@ -951,6 +971,14 @@ bool LoopIdiomRecognize::processLoopStridedStore(
<< "\n");
NewCall->setDebugLoc(TheStore->getDebugLoc());
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStridedStore",
NewCall->getDebugLoc(), Preheader)
<< "Transformed loop-strided store into a call to "
<< ore::NV("NewFunction", NewCall->getCalledFunction())
<< "() function";
});
// Okay, the memset has been formed. Zap the original store and anything that
// feeds into it.
for (auto *I : Stores)
@ -1083,6 +1111,14 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
<< " from store ptr=" << *StoreEv << " at: " << *SI
<< "\n");
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "ProcessLoopStoreOfLoopLoad",
NewCall->getDebugLoc(), Preheader)
<< "Formed a call to "
<< ore::NV("NewFunction", NewCall->getCalledFunction())
<< "() function";
});
// Okay, the memcpy has been formed. Zap the original store and anything that
// feeds into it.
deleteDeadInstruction(SI);

View File

@ -0,0 +1,51 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -basicaa -debugify -loop-idiom -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info < %s -S 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Check that everything still works when debuginfo is present, and that it is reasonably propagated.
; CHECK: remark: <stdin>:6:1: Formed a call to llvm.memcpy.p0i8.p0i8.i64() function
define void @test6_dest_align(i32* noalias align 1 %Base, i32* noalias align 4 %Dest, i64 %Size) nounwind ssp {
; CHECK-LABEL: @test6_dest_align(
; CHECK-NEXT: bb.nph:
; CHECK-NEXT: [[DEST1:%.*]] = bitcast i32* [[DEST:%.*]] to i8*
; CHECK-NEXT: [[BASE2:%.*]] = bitcast i32* [[BASE:%.*]] to i8*
; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[SIZE:%.*]], 2, !dbg !18
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[DEST1]], i8* align 1 [[BASE2]], i64 [[TMP0]], i1 false), !dbg !19
; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg !18
; CHECK: for.body:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], !dbg !20
; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INDVAR]], metadata !9, metadata !DIExpression()), !dbg !20
; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i32, i32* [[BASE]], i64 [[INDVAR]], !dbg !21
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[I_0_014]], metadata !11, metadata !DIExpression()), !dbg !21
; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i32, i32* [[DEST]], i64 [[INDVAR]], !dbg !22
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[DESTI]], metadata !12, metadata !DIExpression()), !dbg !22
; CHECK-NEXT: [[V:%.*]] = load i32, i32* [[I_0_014]], align 1, !dbg !23
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[V]], metadata !13, metadata !DIExpression()), !dbg !23
; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1, !dbg !24
; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INDVAR_NEXT]], metadata !15, metadata !DIExpression()), !dbg !24
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]], !dbg !25
; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[EXITCOND]], metadata !16, metadata !DIExpression()), !dbg !25
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !dbg !26
; CHECK: for.end:
; CHECK-NEXT: ret void, !dbg !27
;
bb.nph:
br label %for.body
for.body: ; preds = %bb.nph, %for.body
%indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
%I.0.014 = getelementptr i32, i32* %Base, i64 %indvar
%DestI = getelementptr i32, i32* %Dest, i64 %indvar
%V = load i32, i32* %I.0.014, align 1
store i32 %V, i32* %DestI, align 4
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, %Size
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}

View File

@ -0,0 +1,55 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -basicaa -debugify -loop-idiom -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info < %s -S 2>&1 | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Check that everything still works when debuginfo is present, and that it is reasonably propagated.
; void my_basic_memset(char* begin, char* end, char value) {
; for( ; begin != end; ++begin)
; *begin = value;
; }
; CHECK: remark: <stdin>:4:1: Transformed loop-strided store into a call to llvm.memset.p0i8.i64() function
define void @_Z15my_basic_memsetPcS_c(i8* %ptr, i8* %end, i8 %value) {
; CHECK-LABEL: @_Z15my_basic_memsetPcS_c(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[PTR1:%.*]] = ptrtoint i8* [[PTR:%.*]] to i64
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8* [[PTR]], [[END:%.*]], !dbg !15
; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP3]], metadata !9, metadata !DIExpression()), !dbg !15
; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]], !dbg !16
; CHECK: for.body.preheader:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[PTR1]], !dbg !17
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, i8* [[END]], i64 [[TMP0]], !dbg !17
; CHECK-NEXT: [[SCEVGEP2:%.*]] = ptrtoint i8* [[SCEVGEP]] to i64
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[PTR]], i8 [[VALUE:%.*]], i64 [[SCEVGEP2]], i1 false), !dbg !17
; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg !17
; CHECK: for.body:
; CHECK-NEXT: [[PTR_ADDR_04:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[PTR]], [[FOR_BODY_PREHEADER]] ], !dbg !18
; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[PTR_ADDR_04]], metadata !11, metadata !DIExpression()), !dbg !18
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PTR_ADDR_04]], i64 1, !dbg !19
; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[INCDEC_PTR]], metadata !13, metadata !DIExpression()), !dbg !19
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[INCDEC_PTR]], [[END]], !dbg !20
; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]], metadata !14, metadata !DIExpression()), !dbg !20
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !dbg !21
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]], !dbg !22
; CHECK: for.end:
; CHECK-NEXT: ret void, !dbg !22
;
entry:
%cmp3 = icmp eq i8* %ptr, %end
br i1 %cmp3, label %for.end, label %for.body
for.body: ; preds = %entry, %for.body
%ptr.addr.04 = phi i8* [ %incdec.ptr, %for.body ], [ %ptr, %entry ]
store i8 %value, i8* %ptr.addr.04, align 1
%incdec.ptr = getelementptr inbounds i8, i8* %ptr.addr.04, i64 1
%cmp = icmp eq i8* %incdec.ptr, %end
br i1 %cmp, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
ret void
}

View File

@ -1,5 +1,5 @@
; RUN: opt -loop-idiom < %s -S | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(loop-idiom)' < %s -S | FileCheck %s
; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,require<opt-remark-emit>,loop(loop-idiom)' < %s -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"