[RewriteStatepointsForGC] Reduce the number of new instructions for base pointers

When computing base pointers, we introduce new instructions to propagate the base of existing instructions which might not be bases. However, the algorithm doesn't make any effort to recognize when the new instruction to be inserted is the same as an existing one already in the IR. Since this is happening immediately before rewriting, we don't really have a chance to fix it after the pass runs without teaching loop passes about statepoints.

I'm really not thrilled with this patch. I've rewritten it 4 different ways now, but this is the best I've come up with. The case where the new instruction is just the original base defining value could be merged into the existing algorithm with some complexity. The problem is that we might have something like an extractelement from a phi of two vectors. It may be trivially obvious that the base of the 0th element is an existing instruction, but I can't see how to make the algorithm itself figure that out. Thus, I resort to the call to SimplifyInstruction instead.

Note that we can only adjust the instructions we've inserted ourselves. The live sets are still being tracked in side structures at this point in the code. We can't easily muck with instructions which might be in them. Long term, I'm really thinking we need to materialize the live pointer sets explicitly in the IR somehow rather than using side structures to track them.

Differential Revision: http://reviews.llvm.org/D12004

llvm-svn: 246133
This commit is contained in:
Philip Reames 2015-08-27 01:02:28 +00:00
parent e0f400feaa
commit abcdc5e3a8
5 changed files with 79 additions and 21 deletions

View File

@ -14,6 +14,7 @@
#include "llvm/Pass.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/Statistic.h"
@ -1009,6 +1010,62 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
}
}
// Now that we're done with the algorithm, see if we can optimize the
// results slightly by reducing the number of new instructions needed.
// Arguably, this should be integrated into the algorithm above, but
// doing as a post process step is easier to reason about for the moment.
DenseMap<Value *, Value *> ReverseMap;
SmallPtrSet<Instruction *, 16> NewInsts;
SmallSetVector<Instruction *, 16> Worklist;
for (auto Item : states) {
Value *V = Item.first;
Value *Base = Item.second.getBase();
assert(V && Base);
assert(!isKnownBaseResult(V) && "why did it get added?");
assert(isKnownBaseResult(Base) &&
"must be something we 'know' is a base pointer");
if (!Item.second.isConflict())
continue;
ReverseMap[Base] = V;
if (auto *BaseI = dyn_cast<Instruction>(Base)) {
NewInsts.insert(BaseI);
Worklist.insert(BaseI);
}
}
auto PushNewUsers = [&](Instruction *I) {
for (User *U : I->users())
if (auto *UI = dyn_cast<Instruction>(U))
if (NewInsts.count(UI))
Worklist.insert(UI);
};
const DataLayout &DL = cast<Instruction>(def)->getModule()->getDataLayout();
while (!Worklist.empty()) {
Instruction *BaseI = Worklist.pop_back_val();
Value *Bdv = ReverseMap[BaseI];
if (auto *BdvI = dyn_cast<Instruction>(Bdv))
if (BaseI->isIdenticalTo(BdvI)) {
DEBUG(dbgs() << "Identical Base: " << *BaseI << "\n");
PushNewUsers(BaseI);
BaseI->replaceAllUsesWith(Bdv);
BaseI->eraseFromParent();
states[Bdv] = BDVState(BDVState::Conflict, Bdv);
NewInsts.erase(BaseI);
ReverseMap.erase(BaseI);
continue;
}
if (Value *V = SimplifyInstruction(BaseI, DL)) {
DEBUG(dbgs() << "Base " << *BaseI << " simplified to " << *V << "\n");
PushNewUsers(BaseI);
BaseI->replaceAllUsesWith(V);
BaseI->eraseFromParent();
states[Bdv] = BDVState(BDVState::Conflict, V);
NewInsts.erase(BaseI);
ReverseMap.erase(BaseI);
continue;
}
}
// Cache all of our results so we can cheaply reuse them
// NOTE: This is actually two caches: one of the base defining value
// relation and one of the base pointer relation! FIXME
@ -1016,7 +1073,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
Value *v = item.first;
Value *base = item.second.getBase();
assert(v && base);
assert(!isKnownBaseResult(v) && "why did it get added?");
if (TraceLSP) {
std::string fromstr =
@ -1028,8 +1084,6 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
<< " to: " << (base->hasName() ? base->getName() : "") << "\n";
}
assert(isKnownBaseResult(base) &&
"must be something we 'know' is a base pointer");
if (cache.count(v)) {
// Once we transition from the BDV relation being store in the cache to
// the base relation being stored, it must be stable

View File

@ -1,7 +1,7 @@
; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
; CHECK: derived %obj_to_consume base %obj_to_consume.base
; CHECK: derived %obj_to_consume base %obj_to_consume
declare void @foo()
declare i64 addrspace(1)* @generate_obj()
@ -33,7 +33,6 @@ dest_c:
merge:
; CHECK: merge:
; CHECK: %obj_to_consume.base = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
; CHECK: %obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
%obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]

View File

@ -80,7 +80,6 @@ loop: ; preds = %loop, %entry
; we'd have commoned these, but that's a missed optimization, not correctness.
; CHECK-DAG: [ [[DISCARD:%.*.base.relocated.casted]], %loop ]
; CHECK-NOT: extra.base
; CHECK: next.base = select
; CHECK: next = select
; CHECK: extra2.base = select
; CHECK: extra2 = select
@ -95,6 +94,24 @@ loop: ; preds = %loop, %entry
br label %loop
}
define i64 addrspace(1)* @test3(i1 %cnd, i64 addrspace(1)* %obj,
i64 addrspace(1)* %obj2)
gc "statepoint-example" {
; CHECK-LABEL: @test3
entry:
br i1 %cnd, label %merge, label %taken
taken:
br label %merge
merge:
; CHECK-LABEL: merge:
; CHECK-NEXT: %bdv = phi
; CHECK-NEXT: gc.statepoint
%bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %taken ]
%safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %bdv
}
declare void @foo()
declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)

View File

@ -40,13 +40,10 @@ untaken2:
br label %merge2
merge2:
; CHECK-LABEL: merge2:
; CHECK: %obj.base = phi i64 addrspace(1)*
; CHECK: %obj = phi i64 addrspace(1)*
; CHECK: statepoint
; CHECK-NEXT: %obj = phi i64 addrspace(1)*
; CHECK-NEXT: statepoint
; CHECK: gc.relocate
; CHECK-DAG: ; (%obj.base, %obj)
; CHECK: gc.relocate
; CHECK-DAG: ; (%obj.base, %obj.base)
; CHECK-DAG: ; (%obj, %obj)
%obj = phi i64 addrspace(1)* [%obj0, %taken2], [%obj1, %untaken2]
%safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
ret i64 addrspace(1)* %obj

View File

@ -121,9 +121,6 @@ define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr)
; CHECK-LABEL: test6
; CHECK-LABEL: merge:
; CHECK-NEXT: = phi
; CHECK-NEXT: = phi
; CHECK-NEXT: extractelement
; CHECK-NEXT: extractelement
; CHECK-NEXT: extractelement
; CHECK-NEXT: extractelement
; CHECK-NEXT: gc.statepoint
@ -131,12 +128,6 @@ define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr)
; CHECK-NEXT: bitcast
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: insertelement
; CHECK-NEXT: ret <2 x i64 addrspace(1)*>