[ZoneAlgo] Handle non-StoreInst/LoadInst MemoryAccesses including memset.
Up to now ZoneAlgo considered array elements access by something else than a LoadInst or StoreInst as not analyzable. This patch removes that restriction by using the unknown ValInst to describe the written content, repectively the element type's null value in case of memset. Differential Revision: https://reviews.llvm.org/D37362 llvm-svn: 312630
This commit is contained in:
parent
05710a8b4f
commit
bd84ce8931
|
@ -137,6 +137,13 @@ private:
|
|||
|
||||
void addArrayReadAccess(MemoryAccess *MA);
|
||||
|
||||
/// Return the ValInst write by a (must-)write access. Returns the 'unknown'
|
||||
/// ValInst if there is no single ValInst[] the array element written to will
|
||||
/// have.
|
||||
///
|
||||
/// @return { ValInst[] }
|
||||
isl::map getWrittenValue(MemoryAccess *MA, isl::map AccRel);
|
||||
|
||||
void addArrayWriteAccess(MemoryAccess *MA);
|
||||
|
||||
protected:
|
||||
|
|
|
@ -363,17 +363,6 @@ void ZoneAlgorithm::collectIncompatibleElts(ScopStmt *Stmt,
|
|||
continue;
|
||||
}
|
||||
|
||||
if (!isa<StoreInst>(MA->getAccessInstruction())) {
|
||||
DEBUG(dbgs() << "WRITE that is not a StoreInst not supported\n");
|
||||
OptimizationRemarkMissed R(PassName, "UnusualStore",
|
||||
MA->getAccessInstruction());
|
||||
R << "encountered write that is not a StoreInst: "
|
||||
<< printInstruction(MA->getAccessInstruction());
|
||||
S->getFunction().getContext().diagnose(R);
|
||||
|
||||
IncompatibleElts = IncompatibleElts.add_set(ArrayElts);
|
||||
}
|
||||
|
||||
// In region statements the order is less clear, eg. the load and store
|
||||
// might be in a boxed loop.
|
||||
if (Stmt->isRegionStmt() &&
|
||||
|
@ -432,6 +421,37 @@ void ZoneAlgorithm::addArrayReadAccess(MemoryAccess *MA) {
|
|||
}
|
||||
}
|
||||
|
||||
isl::map ZoneAlgorithm::getWrittenValue(MemoryAccess *MA, isl::map AccRel) {
|
||||
if (!MA->isMustWrite())
|
||||
return {};
|
||||
|
||||
Value *AccVal = MA->getAccessValue();
|
||||
ScopStmt *Stmt = MA->getStatement();
|
||||
Instruction *AccInst = MA->getAccessInstruction();
|
||||
|
||||
// Write a value to a single element.
|
||||
auto L = MA->isOriginalArrayKind() ? LI->getLoopFor(AccInst->getParent())
|
||||
: Stmt->getSurroundingLoop();
|
||||
if (AccVal &&
|
||||
AccVal->getType() == MA->getLatestScopArrayInfo()->getElementType() &&
|
||||
AccRel.is_single_valued())
|
||||
return makeValInst(AccVal, Stmt, L);
|
||||
|
||||
// memset(_, '0', ) is equivalent to writing the null value to all touched
|
||||
// elements. isMustWrite() ensures that all of an element's bytes are
|
||||
// overwritten.
|
||||
if (auto *Memset = dyn_cast<MemSetInst>(AccInst)) {
|
||||
auto *WrittenConstant = dyn_cast<Constant>(Memset->getValue());
|
||||
Type *Ty = MA->getLatestScopArrayInfo()->getElementType();
|
||||
if (WrittenConstant && WrittenConstant->isZeroValue()) {
|
||||
Constant *Zero = Constant::getNullValue(Ty);
|
||||
return makeValInst(Zero, Stmt, L);
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
void ZoneAlgorithm::addArrayWriteAccess(MemoryAccess *MA) {
|
||||
assert(MA->isLatestArrayKind());
|
||||
assert(MA->isWrite());
|
||||
|
@ -449,10 +469,9 @@ void ZoneAlgorithm::addArrayWriteAccess(MemoryAccess *MA) {
|
|||
give(isl_union_map_add_map(AllMayWrites.take(), AccRel.copy()));
|
||||
|
||||
// { Domain[] -> ValInst[] }
|
||||
auto WriteValInstance =
|
||||
makeValInst(MA->getAccessValue(), Stmt,
|
||||
LI->getLoopFor(MA->getAccessInstruction()->getParent()),
|
||||
MA->isMustWrite());
|
||||
auto WriteValInstance = getWrittenValue(MA, AccRel);
|
||||
if (!WriteValInstance)
|
||||
WriteValInstance = makeUnknownForDomain(Stmt);
|
||||
|
||||
// { Domain[] -> [Element[] -> Domain[]] }
|
||||
auto IncludeElement = give(isl_map_curry(isl_map_domain_map(AccRel.copy())));
|
||||
|
@ -698,8 +717,6 @@ void ZoneAlgorithm::computeCommon() {
|
|||
for (auto *MA : Stmt) {
|
||||
if (!MA->isLatestArrayKind())
|
||||
continue;
|
||||
if (!isCompatibleAccess(MA))
|
||||
continue;
|
||||
|
||||
if (MA->isRead())
|
||||
addArrayReadAccess(MA);
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
; RUN: opt %loadPolly -polly-delicm -analyze < %s | FileCheck -match-full-lines %s
|
||||
;
|
||||
; Check that PHI mapping works even in presence of a memset whose'
|
||||
; zero value is used.
|
||||
;
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
|
||||
|
||||
define void @func(i8* noalias nonnull %A) {
|
||||
entry:
|
||||
br label %outer.for
|
||||
|
||||
outer.for:
|
||||
%j = phi i32 [0, %entry], [%j.inc, %outer.inc]
|
||||
%j.cmp = icmp slt i32 %j, 2
|
||||
br i1 %j.cmp, label %bodyA, label %outer.exit
|
||||
|
||||
|
||||
bodyA:
|
||||
%A_idx = getelementptr inbounds i8, i8* %A, i32 %j
|
||||
%cond = icmp eq i32 21, 21
|
||||
br i1 %cond, label %bodyB, label %bodyC
|
||||
|
||||
bodyB:
|
||||
call void @llvm.memset.p0i8.i64(i8* %A_idx, i8 0, i64 1, i32 1, i1 false)
|
||||
br label %bodyC
|
||||
|
||||
bodyC:
|
||||
%phi = phi i8 [1, %bodyA], [0, %bodyB]
|
||||
%a = load i8, i8* %A_idx
|
||||
store i8 %phi, i8* %A_idx
|
||||
br label %outer.inc
|
||||
|
||||
|
||||
outer.inc:
|
||||
%j.inc = add nuw nsw i32 %j, 1
|
||||
br label %outer.for
|
||||
|
||||
outer.exit:
|
||||
br label %return
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; CHECK: Statistics {
|
||||
; CHECK: Compatible overwrites: 1
|
||||
; CHECK: Overwrites mapped to: 1
|
||||
; CHECK: PHI scalars mapped: 1
|
||||
; CHECK: }
|
||||
|
||||
; CHECK: After accesses {
|
||||
; CHECK-NEXT: Stmt_bodyA
|
||||
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
|
||||
; CHECK-NEXT: { Stmt_bodyA[i0] -> MemRef_phi__phi[] };
|
||||
; CHECK-NEXT: new: { Stmt_bodyA[i0] -> MemRef_A[o0] : 1 = 0 };
|
||||
; CHECK-NEXT: Stmt_bodyB
|
||||
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: { Stmt_bodyB[i0] -> MemRef_A[i0] };
|
||||
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
|
||||
; CHECK-NEXT: { Stmt_bodyB[i0] -> MemRef_phi__phi[] };
|
||||
; CHECK-NEXT: new: { Stmt_bodyB[i0] -> MemRef_A[i0] };
|
||||
; CHECK-NEXT: Stmt_bodyC
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
|
||||
; CHECK-NEXT: { Stmt_bodyC[i0] -> MemRef_phi__phi[] };
|
||||
; CHECK-NEXT: new: { Stmt_bodyC[i0] -> MemRef_A[i0] };
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: { Stmt_bodyC[i0] -> MemRef_A[i0] };
|
||||
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: { Stmt_bodyC[i0] -> MemRef_A[i0] };
|
||||
; CHECK-NEXT: }
|
|
@ -1,8 +1,10 @@
|
|||
; RUN: opt %loadPolly -polly-delicm -analyze -pass-remarks-missed=polly-delicm < %s 2>&1 | FileCheck %s
|
||||
; RUN: opt %loadPolly -polly-delicm -analyze< %s | FileCheck %s
|
||||
; RUN: opt %loadPolly -polly-delicm -disable-output -stats < %s 2>&1 | FileCheck %s --check-prefix=STATS
|
||||
;
|
||||
; void func(double *A) {
|
||||
; for (int j = 0; j < 2; j += 1) { /* outer */
|
||||
; memset(A[j], 0, sizeof(double));
|
||||
; A[j] = 21.0;
|
||||
; A[j] = 42.0;
|
||||
; double phi = 0.0;
|
||||
; for (int i = 0; i < 4; i += 1) /* reduction */
|
||||
; phi += 4.2;
|
||||
|
@ -11,8 +13,6 @@
|
|||
; }
|
||||
;
|
||||
|
||||
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
|
||||
|
||||
define void @func(double* noalias nonnull %A) {
|
||||
entry:
|
||||
br label %outer.preheader
|
||||
|
@ -28,8 +28,8 @@ outer.for:
|
|||
|
||||
reduction.preheader:
|
||||
%A_idx = getelementptr inbounds double, double* %A, i32 %j
|
||||
%tmp = bitcast double* %A_idx to i8*
|
||||
call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 8, i32 1, i1 false)
|
||||
store double 21.0, double* %A_idx
|
||||
store double 42.0, double* %A_idx
|
||||
br label %reduction.for
|
||||
|
||||
reduction.for:
|
||||
|
@ -68,4 +68,5 @@ return:
|
|||
}
|
||||
|
||||
|
||||
; CHECK: encountered write that is not a StoreInst: call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 8, i32 1, i1 false)
|
||||
; CHECK: No modification has been made
|
||||
; STATS: 1 polly-zone - Number of not zone-analyzable arrays
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
|
||||
;
|
||||
; Rematerialize a load in the presence of a non-store WRITE access.
|
||||
;
|
||||
; for (int j = 0; j < n; j += 1) {
|
||||
; bodyA:
|
||||
; double val = B[j];
|
||||
;
|
||||
; bodyB:
|
||||
; A[j] = val;
|
||||
;
|
||||
; bodyC:
|
||||
; memset(A, 0, 16);
|
||||
; memset(B, 0, 16);
|
||||
; }
|
||||
;
|
||||
|
||||
declare void @llvm.memset.p0f64.i64(double* nocapture, i8, i64, i32, i1)
|
||||
|
||||
define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
|
||||
entry:
|
||||
br label %for
|
||||
|
||||
for:
|
||||
%j = phi i32 [0, %entry], [%j.inc, %inc]
|
||||
%j.cmp = icmp slt i32 %j, %n
|
||||
br i1 %j.cmp, label %bodyA, label %exit
|
||||
|
||||
bodyA:
|
||||
%B_idx = getelementptr inbounds double, double* %B, i32 %j
|
||||
%val = load double, double* %B_idx
|
||||
br label %bodyB
|
||||
|
||||
bodyB:
|
||||
%A_idx = getelementptr inbounds double, double* %A, i32 %j
|
||||
store double %val, double* %A_idx
|
||||
br label %bodyC
|
||||
|
||||
bodyC:
|
||||
call void @llvm.memset.p0f64.i64(double* %A, i8 0, i64 16, i32 1, i1 false)
|
||||
call void @llvm.memset.p0f64.i64(double* %B, i8 0, i64 16, i32 1, i1 false)
|
||||
br label %inc
|
||||
|
||||
inc:
|
||||
%j.inc = add nuw nsw i32 %j, 1
|
||||
br label %for
|
||||
|
||||
exit:
|
||||
br label %return
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; CHECK: Statistics {
|
||||
; CHECK: Known loads forwarded: 1
|
||||
; CHECK: Operand trees forwarded: 1
|
||||
; CHECK: Statements with forwarded operand trees: 1
|
||||
; CHECK: }
|
|
@ -0,0 +1,81 @@
|
|||
; RUN: opt %loadPolly -polly-optree -analyze < %s | FileCheck %s -match-full-lines
|
||||
;
|
||||
; Rematerialize a load in the presence of a non-store WRITE access.
|
||||
;
|
||||
; for (int j = 0; j < n; j += 1) {
|
||||
; bodyA:
|
||||
; memset(A, 0, 16);
|
||||
; memset(B, 0, 16);
|
||||
;
|
||||
; bodyB:
|
||||
; double val = B[j];
|
||||
;
|
||||
; bodyC:
|
||||
; A[j] = val;
|
||||
; }
|
||||
;
|
||||
|
||||
declare void @llvm.memset.p0f64.i64(double* nocapture, i8, i64, i32, i1)
|
||||
|
||||
define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
|
||||
entry:
|
||||
br label %for
|
||||
|
||||
for:
|
||||
%j = phi i32 [0, %entry], [%j.inc, %inc]
|
||||
%j.cmp = icmp slt i32 %j, %n
|
||||
br i1 %j.cmp, label %bodyA, label %exit
|
||||
|
||||
bodyA:
|
||||
call void @llvm.memset.p0f64.i64(double* %A, i8 0, i64 16, i32 1, i1 false)
|
||||
call void @llvm.memset.p0f64.i64(double* %B, i8 0, i64 16, i32 1, i1 false)
|
||||
br label %bodyB
|
||||
|
||||
bodyB:
|
||||
%B_idx = getelementptr inbounds double, double* %B, i32 %j
|
||||
%val = load double, double* %B_idx
|
||||
br label %bodyC
|
||||
|
||||
bodyC:
|
||||
%A_idx = getelementptr inbounds double, double* %A, i32 %j
|
||||
store double %val, double* %A_idx
|
||||
br label %inc
|
||||
|
||||
inc:
|
||||
%j.inc = add nuw nsw i32 %j, 1
|
||||
br label %for
|
||||
|
||||
exit:
|
||||
br label %return
|
||||
|
||||
return:
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; CHECK: Statistics {
|
||||
; CHECK: Known loads forwarded: 1
|
||||
; CHECK: Operand trees forwarded: 1
|
||||
; CHECK: Statements with forwarded operand trees: 1
|
||||
; CHECK: }
|
||||
|
||||
; CHECK-NEXT: After statements {
|
||||
; CHECK: Stmt_bodyB
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_B[o0] : 8i0 <= o0 <= 7 + 8i0 };
|
||||
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
|
||||
; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_val[] };
|
||||
; CHECK-NEXT: Instructions {
|
||||
; CHECK-NEXT: %val = load double, double* %B_idx
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: Stmt_bodyC
|
||||
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: null;
|
||||
; CHECK-NEXT: new: [n] -> { Stmt_bodyC[i0] -> MemRef_B[8i0] };
|
||||
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: [n] -> { Stmt_bodyC[i0] -> MemRef_A[o0] : 8i0 <= o0 <= 7 + 8i0 };
|
||||
; CHECK-NEXT: Instructions {
|
||||
; CHECK-NEXT: %val = load double, double* %B_idx
|
||||
; CHECK-NEXT: store double %val, double* %A_idx
|
||||
; CHECK-NEXT: }
|
||||
; CHECK-NEXT: }
|
Loading…
Reference in New Issue