BlockGenerator: Generate synthesisable instructions only on-demand

Instructions which we can synthesis from a SCEV expression are not generated
directly, but only when they are used as an operand of another instruction. This
avoids generating unnecessary instruction and works more reliably than first
inserting them and then deleting them later on.

Suggested-by: Johannes Doerfert <doerfert@cs.uni-saarland.de>

Differential Revision: http://reviews.llvm.org/D13208

llvm-svn: 248712
This commit is contained in:
Tobias Grosser 2015-09-28 13:47:50 +00:00
parent 8ff61200f4
commit 28b9a14b07
4 changed files with 12 additions and 53 deletions

View File

@ -254,8 +254,7 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst,
Loop *L = getLoopForInst(Inst);
if ((Stmt.isBlockStmt() || !Stmt.getRegion()->contains(L)) &&
canSynthesize(Inst, &LI, &SE, &Stmt.getParent()->getRegion())) {
Value *NewValue = getNewValue(Stmt, Inst, BBMap, LTS, L);
BBMap[Inst] = NewValue;
// Synthesizable statements will be generated on-demand.
return;
}
@ -285,28 +284,6 @@ void BlockGenerator::copyInstruction(ScopStmt &Stmt, const Instruction *Inst,
copyInstScalar(Stmt, Inst, BBMap, LTS);
}
/// @brief Remove trivially dead instructions from BB
///
/// This function drops trivially dead instructions from a basic block. It
/// on purpose does _not_ recurse into other BBs even if the deletion of
/// instructions in this basic block can make instructions in other basic blocks
/// triviall dead.
static void simplifyInstsInBlockOnly(BasicBlock *BB) {
auto BI = --BB->end(), BE = BB->begin();
bool Exit = false;
while (!Exit) {
auto ToRemove = BI;
if (BI != BE)
BI--;
else
Exit = true;
if (!isInstructionTriviallyDead(ToRemove))
continue;
ToRemove->eraseFromParent();
}
}
void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
isl_id_to_ast_expr *NewAccesses) {
assert(Stmt.isBlockStmt() &&
@ -316,16 +293,6 @@ void BlockGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
BasicBlock *BB = Stmt.getBasicBlock();
copyBB(Stmt, BB, BBMap, LTS, NewAccesses);
auto CopyBB = Builder.GetInsertBlock();
// Delete trivially dead instructions in CopyBB, but not in any other BB.
// Only for copyBB we know that there will _never_ be any future uses of
// instructions that have no use after copyBB has finished. Other instructions
// in the AST that have been generated by IslNodeBuilder may look dead at
// the moment, but may possibly still be referenced by GlobalMaps. If we
// delete them now, later uses would break surprisingly.
simplifyInstsInBlockOnly(CopyBB);
Builder.SetInsertPoint(CopyBB->getTerminator());
}
BasicBlock *BlockGenerator::splitBB(BasicBlock *BB) {
@ -1116,15 +1083,6 @@ void RegionGenerator::copyStmt(ScopStmt &Stmt, LoopToScevMapT &LTS,
LTS[L] = SE.getUnknown(LoopPHI);
}
// Delete trivially dead instructions in CopyBB, but not in any other BB.
// Only for copyBB we know that there will _never_ be any future uses of
// instructions that have no use after copyBB has finished. Other instructions
// in the AST that have been generated by IslNodeBuilder may look dead at
// the moment, but may possibly still be referenced by GlobalMaps. If we
// delete them now, later uses would break surprisingly.
for (auto *BB : SeenBlocks)
simplifyInstsInBlockOnly(BlockMap[BB]);
// Reset the old insert point for the build.
Builder.SetInsertPoint(ExitBBCopy->begin());
}

View File

@ -25,11 +25,11 @@
; IR: %polly.access.polly.subfunc.arg.A = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A
; IR: %tmp10_p_scalar_ = load float, float* %polly.access.polly.subfunc.arg.A, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_loop_access !3
; IR: %polly.access.mul.polly.subfunc.arg.A9 = mul i64 %polly.indvar, %polly.subfunc.arg.m
; IR: %polly.access.mul.polly.subfunc.arg.A8 = mul i64 %polly.indvar, %polly.subfunc.arg.m
; IR: %7 = add nsw i64 %polly.indvar5, 43
; IR: %polly.access.add.polly.subfunc.arg.A10 = add i64 %polly.access.mul.polly.subfunc.arg.A9, %7
; IR: %polly.access.polly.subfunc.arg.A11 = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A10
; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A11, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_
; IR: %polly.access.add.polly.subfunc.arg.A9 = add i64 %polly.access.mul.polly.subfunc.arg.A8, %7
; IR: %polly.access.polly.subfunc.arg.A10 = getelementptr float, float* %polly.subfunc.arg.A, i64 %polly.access.add.polly.subfunc.arg.A9
; IR: store float %p_tmp11, float* %polly.access.polly.subfunc.arg.A10, align 4, !alias.scope !0, !noalias !2, !llvm.mem.parallel_
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @new_multidim_access(i64 %n, i64 %m, float* %A) {

View File

@ -28,7 +28,7 @@
; each value of i to indeed be mapped to a value.
;
; CHECK: %pexp.p_div_q = udiv i64 %polly.indvar, 127
; CHECK: %polly.access.B8 = getelementptr float, float* %B, i64 %pexp.p_div_q
; CHECK: %polly.access.B7 = getelementptr float, float* %B, i64 %pexp.p_div_q
; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
; A[p + 127 * floord(-p - 1, 127) + 127]
@ -42,11 +42,11 @@
; CHECK: %22 = mul nsw i64 127, %pexp.fdiv_q.4
; CHECK: %23 = add nsw i64 %p, %22
; CHECK: %24 = add nsw i64 %23, 127
; CHECK: %polly.access.A10 = getelementptr float, float* %A, i64 %24
; CHECK: %polly.access.A8 = getelementptr float, float* %A, i64 %24
; A[p / 127]
; CHECK: %pexp.div = sdiv exact i64 %p, 127
; CHECK: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div
; CHECK: %polly.access.B9 = getelementptr float, float* %B, i64 %pexp.div
; A[i % 128]
; POW2: %pexp.pdiv_r = urem i64 %polly.indvar, 128
@ -54,7 +54,7 @@
; A[floor(i / 128)]
; POW2: %pexp.p_div_q = udiv i64 %polly.indvar, 128
; POW2: %polly.access.B8 = getelementptr float, float* %B, i64 %pexp.p_div_q
; POW2: %polly.access.B7 = getelementptr float, float* %B, i64 %pexp.p_div_q
; #define floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
; A[p + 128 * floord(-p - 1, 128) + 128]
@ -64,11 +64,11 @@
; POW2: %22 = mul nsw i64 128, %polly.fdiv_q.shr
; POW2: %23 = add nsw i64 %p, %22
; POW2: %24 = add nsw i64 %23, 128
; POW2: %polly.access.A10 = getelementptr float, float* %A, i64 %24
; POW2: %polly.access.A8 = getelementptr float, float* %A, i64 %24
; A[p / 128]
; POW2: %pexp.div = sdiv exact i64 %p, 128
; POW2: %polly.access.B12 = getelementptr float, float* %B, i64 %pexp.div
; POW2: %polly.access.B9 = getelementptr float, float* %B, i64 %pexp.div
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

View File

@ -13,6 +13,7 @@ loop:
br i1 %cond0, label %branch1, label %backedge
; CHECK-LABEL: polly.stmt.loop:
; CHECK-NEXT: %polly.subregion.iv = phi i32 [ 0, %polly.stmt.loop.entry ]
; CHECK-NEXT: %p_val0 = fadd float 1.000000e+00, 2.000000e+00
; CHECK-NEXT: %p_val1 = fadd float 1.000000e+00, 2.000000e+00
; CHECK-NEXT: %p_val2 = fadd float 1.000000e+00, 2.000000e+00