[ZoneAlgo] Handle non-StoreInst/LoadInst MemoryAccesses including memset.

Up to now ZoneAlgo considered array elements access by something else
than a LoadInst or StoreInst as not analyzable. This patch removes that
restriction by using the unknown ValInst to describe the written
content, repectively the element type's null value in case of memset.

Differential Revision: https://reviews.llvm.org/D37362

llvm-svn: 312630
This commit is contained in:
Michael Kruse 2017-09-06 12:40:55 +00:00
parent 05710a8b4f
commit bd84ce8931
6 changed files with 261 additions and 24 deletions

View File

@ -137,6 +137,13 @@ private:
void addArrayReadAccess(MemoryAccess *MA);
/// Return the ValInst write by a (must-)write access. Returns the 'unknown'
/// ValInst if there is no single ValInst[] the array element written to will
/// have.
///
/// @return { ValInst[] }
isl::map getWrittenValue(MemoryAccess *MA, isl::map AccRel);
void addArrayWriteAccess(MemoryAccess *MA);
protected:

View File

@ -363,17 +363,6 @@ void ZoneAlgorithm::collectIncompatibleElts(ScopStmt *Stmt,
continue;
}
if (!isa<StoreInst>(MA->getAccessInstruction())) {
DEBUG(dbgs() << "WRITE that is not a StoreInst not supported\n");
OptimizationRemarkMissed R(PassName, "UnusualStore",
MA->getAccessInstruction());
R << "encountered write that is not a StoreInst: "
<< printInstruction(MA->getAccessInstruction());
S->getFunction().getContext().diagnose(R);
IncompatibleElts = IncompatibleElts.add_set(ArrayElts);
}
// In region statements the order is less clear, eg. the load and store
// might be in a boxed loop.
if (Stmt->isRegionStmt() &&
@ -432,6 +421,37 @@ void ZoneAlgorithm::addArrayReadAccess(MemoryAccess *MA) {
}
}
isl::map ZoneAlgorithm::getWrittenValue(MemoryAccess *MA, isl::map AccRel) {
if (!MA->isMustWrite())
return {};
Value *AccVal = MA->getAccessValue();
ScopStmt *Stmt = MA->getStatement();
Instruction *AccInst = MA->getAccessInstruction();
// Write a value to a single element.
auto L = MA->isOriginalArrayKind() ? LI->getLoopFor(AccInst->getParent())
: Stmt->getSurroundingLoop();
if (AccVal &&
AccVal->getType() == MA->getLatestScopArrayInfo()->getElementType() &&
AccRel.is_single_valued())
return makeValInst(AccVal, Stmt, L);
// memset(_, '0', ) is equivalent to writing the null value to all touched
// elements. isMustWrite() ensures that all of an element's bytes are
// overwritten.
if (auto *Memset = dyn_cast<MemSetInst>(AccInst)) {
auto *WrittenConstant = dyn_cast<Constant>(Memset->getValue());
Type *Ty = MA->getLatestScopArrayInfo()->getElementType();
if (WrittenConstant && WrittenConstant->isZeroValue()) {
Constant *Zero = Constant::getNullValue(Ty);
return makeValInst(Zero, Stmt, L);
}
}
return {};
}
void ZoneAlgorithm::addArrayWriteAccess(MemoryAccess *MA) {
assert(MA->isLatestArrayKind());
assert(MA->isWrite());
@ -449,10 +469,9 @@ void ZoneAlgorithm::addArrayWriteAccess(MemoryAccess *MA) {
give(isl_union_map_add_map(AllMayWrites.take(), AccRel.copy()));
// { Domain[] -> ValInst[] }
auto WriteValInstance =
makeValInst(MA->getAccessValue(), Stmt,
LI->getLoopFor(MA->getAccessInstruction()->getParent()),
MA->isMustWrite());
auto WriteValInstance = getWrittenValue(MA, AccRel);
if (!WriteValInstance)
WriteValInstance = makeUnknownForDomain(Stmt);
// { Domain[] -> [Element[] -> Domain[]] }
auto IncludeElement = give(isl_map_curry(isl_map_domain_map(AccRel.copy())));
@ -698,8 +717,6 @@ void ZoneAlgorithm::computeCommon() {
for (auto *MA : Stmt) {
if (!MA->isLatestArrayKind())
continue;
if (!isCompatibleAccess(MA))
continue;
if (MA->isRead())
addArrayReadAccess(MA);

View File

@ -0,0 +1,71 @@
; RUN: opt %loadPolly -polly-delicm -analyze < %s | FileCheck -match-full-lines %s
;
; Check that PHI mapping works even in presence of a memset whose'
; zero value is used.
;
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
define void @func(i8* noalias nonnull %A) {
entry:
br label %outer.for
outer.for:
%j = phi i32 [0, %entry], [%j.inc, %outer.inc]
%j.cmp = icmp slt i32 %j, 2
br i1 %j.cmp, label %bodyA, label %outer.exit
bodyA:
%A_idx = getelementptr inbounds i8, i8* %A, i32 %j
%cond = icmp eq i32 21, 21
br i1 %cond, label %bodyB, label %bodyC
bodyB:
call void @llvm.memset.p0i8.i64(i8* %A_idx, i8 0, i64 1, i32 1, i1 false)
br label %bodyC
bodyC:
%phi = phi i8 [1, %bodyA], [0, %bodyB]
%a = load i8, i8* %A_idx
store i8 %phi, i8* %A_idx
br label %outer.inc
outer.inc:
%j.inc = add nuw nsw i32 %j, 1
br label %outer.for
outer.exit:
br label %return
return:
ret void
}
; CHECK: Statistics {
; CHECK: Compatible overwrites: 1
; CHECK: Overwrites mapped to: 1
; CHECK: PHI scalars mapped: 1
; CHECK: }
; CHECK: After accesses {
; CHECK-NEXT: Stmt_bodyA
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_bodyA[i0] -> MemRef_phi__phi[] };
; CHECK-NEXT: new: { Stmt_bodyA[i0] -> MemRef_A[o0] : 1 = 0 };
; CHECK-NEXT: Stmt_bodyB
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bodyB[i0] -> MemRef_A[i0] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_bodyB[i0] -> MemRef_phi__phi[] };
; CHECK-NEXT: new: { Stmt_bodyB[i0] -> MemRef_A[i0] };
; CHECK-NEXT: Stmt_bodyC
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_bodyC[i0] -> MemRef_phi__phi[] };
; CHECK-NEXT: new: { Stmt_bodyC[i0] -> MemRef_A[i0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bodyC[i0] -> MemRef_A[i0] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_bodyC[i0] -> MemRef_A[i0] };
; CHECK-NEXT: }

View File

@ -1,8 +1,10 @@
; RUN: opt %loadPolly -polly-delicm -analyze -pass-remarks-missed=polly-delicm < %s 2>&1 | FileCheck %s
; RUN: opt %loadPolly -polly-delicm -analyze< %s | FileCheck %s
; RUN: opt %loadPolly -polly-delicm -disable-output -stats < %s 2>&1 | FileCheck %s --check-prefix=STATS
;
; void func(double *A) {
; for (int j = 0; j < 2; j += 1) { /* outer */
; memset(A[j], 0, sizeof(double));
; A[j] = 21.0;
; A[j] = 42.0;
; double phi = 0.0;
; for (int i = 0; i < 4; i += 1) /* reduction */
; phi += 4.2;
@ -11,8 +13,6 @@
; }
;
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
define void @func(double* noalias nonnull %A) {
entry:
br label %outer.preheader
@ -28,8 +28,8 @@ outer.for:
reduction.preheader:
%A_idx = getelementptr inbounds double, double* %A, i32 %j
%tmp = bitcast double* %A_idx to i8*
call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 8, i32 1, i1 false)
store double 21.0, double* %A_idx
store double 42.0, double* %A_idx
br label %reduction.for
reduction.for:
@ -68,4 +68,5 @@ return:
}
; CHECK: encountered write that is not a StoreInst: call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 8, i32 1, i1 false)
; CHECK: No modification has been made
; STATS: 1 polly-zone - Number of not zone-analyzable arrays

View File

@ -0,0 +1,60 @@
; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
;
; Rematerialize a load in the presence of a non-store WRITE access.
;
; for (int j = 0; j < n; j += 1) {
; bodyA:
; double val = B[j];
;
; bodyB:
; A[j] = val;
;
; bodyC:
; memset(A, 0, 16);
; memset(B, 0, 16);
; }
;
declare void @llvm.memset.p0f64.i64(double* nocapture, i8, i64, i32, i1)
define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
entry:
br label %for
for:
%j = phi i32 [0, %entry], [%j.inc, %inc]
%j.cmp = icmp slt i32 %j, %n
br i1 %j.cmp, label %bodyA, label %exit
bodyA:
%B_idx = getelementptr inbounds double, double* %B, i32 %j
%val = load double, double* %B_idx
br label %bodyB
bodyB:
%A_idx = getelementptr inbounds double, double* %A, i32 %j
store double %val, double* %A_idx
br label %bodyC
bodyC:
call void @llvm.memset.p0f64.i64(double* %A, i8 0, i64 16, i32 1, i1 false)
call void @llvm.memset.p0f64.i64(double* %B, i8 0, i64 16, i32 1, i1 false)
br label %inc
inc:
%j.inc = add nuw nsw i32 %j, 1
br label %for
exit:
br label %return
return:
ret void
}
; CHECK: Statistics {
; CHECK: Known loads forwarded: 1
; CHECK: Operand trees forwarded: 1
; CHECK: Statements with forwarded operand trees: 1
; CHECK: }

View File

@ -0,0 +1,81 @@
; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
;
; Rematerialize a load in the presence of a non-store WRITE access.
;
; for (int j = 0; j < n; j += 1) {
; bodyA:
; memset(A, 0, 16);
; memset(B, 0, 16);
;
; bodyB:
; double val = B[j];
;
; bodyC:
; A[j] = val;
; }
;
declare void @llvm.memset.p0f64.i64(double* nocapture, i8, i64, i32, i1)
define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
entry:
br label %for
for:
%j = phi i32 [0, %entry], [%j.inc, %inc]
%j.cmp = icmp slt i32 %j, %n
br i1 %j.cmp, label %bodyA, label %exit
bodyA:
call void @llvm.memset.p0f64.i64(double* %A, i8 0, i64 16, i32 1, i1 false)
call void @llvm.memset.p0f64.i64(double* %B, i8 0, i64 16, i32 1, i1 false)
br label %bodyB
bodyB:
%B_idx = getelementptr inbounds double, double* %B, i32 %j
%val = load double, double* %B_idx
br label %bodyC
bodyC:
%A_idx = getelementptr inbounds double, double* %A, i32 %j
store double %val, double* %A_idx
br label %inc
inc:
%j.inc = add nuw nsw i32 %j, 1
br label %for
exit:
br label %return
return:
ret void
}
; CHECK: Statistics {
; CHECK: Known loads forwarded: 1
; CHECK: Operand trees forwarded: 1
; CHECK: Statements with forwarded operand trees: 1
; CHECK: }
; CHECK-NEXT: After statements {
; CHECK: Stmt_bodyB
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_B[o0] : 8i0 <= o0 <= 7 + 8i0 };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_val[] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: %val = load double, double* %B_idx
; CHECK-NEXT: }
; CHECK-NEXT: Stmt_bodyC
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: null;
; CHECK-NEXT: new: [n] -> { Stmt_bodyC[i0] -> MemRef_B[8i0] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [n] -> { Stmt_bodyC[i0] -> MemRef_A[o0] : 8i0 <= o0 <= 7 + 8i0 };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: %val = load double, double* %B_idx
; CHECK-NEXT: store double %val, double* %A_idx
; CHECK-NEXT: }
; CHECK-NEXT: }