[ZoneAlgo/ForwardOpTree] Normalize PHIs to their known incoming values.

Represent PHIs by their incoming values instead of an opaque value of
themselves. This allows ForwardOpTree to "look through" the PHIs and
forward the incoming values since forwardings PHIs is currently not
supported.

This is particularly useful to cope with PHIs inserted by GVN LoadPRE.
The incoming values all resolve to a load from a single array element
which then can be forwarded.

It should in theory also reduce spurious conflicts in value mapping
(DeLICM), but I have not yet found a profitable case yet, so it is
not included here.

To avoid transitive closure and potentially necessary overapproximations
of those, PHIs that may reference themselves are excluded from
normalization and keep their opaque self-representation.

Differential Revision: https://reviews.llvm.org/D39333

llvm-svn: 317008
This commit is contained in:
Michael Kruse 2017-10-31 16:11:46 +00:00
parent 616cd99194
commit 68821a8b91
8 changed files with 786 additions and 55 deletions

View File

@ -15,6 +15,8 @@
#define POLLY_ZONEALGO_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "isl/isl-noexceptions.h"
#include <memory>
@ -22,12 +24,14 @@ namespace llvm {
class Value;
class LoopInfo;
class Loop;
class PHINode;
} // namespace llvm
namespace polly {
class Scop;
class ScopStmt;
class MemoryAccess;
class ScopArrayInfo;
/// Return only the mappings that map to known values.
///
@ -108,6 +112,47 @@ protected:
/// { Element[] }
isl::union_set CompatibleElts;
/// List of PHIs that may transitively refer to themselves.
///
/// Computing them would require a polyhedral transitive closure operation,
/// for which isl may only return an approximation. For correctness, we always
/// require an exact result. Hence, we exclude such PHIs.
llvm::SmallPtrSet<llvm::PHINode *, 4> RecursivePHIs;
/// PHIs that have been computed.
///
/// Computed PHIs are replaced by their incoming values using #NormalizeMap.
llvm::DenseSet<llvm::PHINode *> ComputedPHIs;
/// For computed PHIs, contains the ValInst they stand for.
///
/// To show an example, assume the following PHINode:
///
/// Stmt:
/// %phi = phi double [%val1, %bb1], [%val2, %bb2]
///
/// It's ValInst is:
///
/// { [Stmt[i] -> phi[]] }
///
/// The value %phi will be either %val1 or %val2, depending on whether in
/// iteration i %bb1 or %bb2 has been executed before. In SCoPs, this can be
/// determined at compile-time, and the result stored in #NormalizeMap. For
/// the previous example, it could be:
///
/// { [Stmt[i] -> phi[]] -> [Stmt[0] -> val1[]];
/// [Stmt[i] -> phi[]] -> [Stmt[i] -> val2[]] : i > 0 }
///
/// Only ValInsts in #ComputedPHIs are present in this map. Other values are
/// assumed to represent themselves. This is to avoid adding lots of identity
/// entries to this map.
///
/// { PHIValInst[] -> IncomingValInst[] }
isl::union_map NormalizeMap;
/// Cache for computePerPHI(const ScopArrayInfo *)
llvm::SmallDenseMap<llvm::PHINode *, isl::union_map> PerPHIMaps;
/// Prepare the object before computing the zones of @p S.
///
/// @param PassName Name of the pass using this analysis.
@ -142,7 +187,7 @@ private:
/// have.
///
/// @return { ValInst[] }
isl::map getWrittenValue(MemoryAccess *MA, isl::map AccRel);
isl::union_map getWrittenValue(MemoryAccess *MA, isl::map AccRel);
void addArrayWriteAccess(MemoryAccess *MA);
@ -151,6 +196,17 @@ protected:
isl::union_map makeEmptyUnionMap() const;
/// For each 'execution' of a PHINode, get the incoming block that was
/// executed before.
///
/// For each PHI instance we can directly determine which was the incoming
/// block, and hence derive which value the PHI has.
///
/// @param SAI The ScopArrayInfo representing the PHI's storage.
///
/// @return { DomainPHIRead[] -> DomainPHIWrite[] }
isl::union_map computePerPHI(const polly::ScopArrayInfo *SAI);
/// Find the array elements that can be used for zone analysis.
void collectCompatibleElts();
@ -244,6 +300,15 @@ protected:
isl::map makeValInst(llvm::Value *Val, ScopStmt *UserStmt, llvm::Loop *Scope,
bool IsCertain = true);
/// Create and normalize a ValInst.
///
/// @see makeValInst
/// @see normalizeValInst
/// @see #NormalizedPHI
isl::union_map makeNormalizedValInst(llvm::Value *Val, ScopStmt *UserStmt,
llvm::Loop *Scope,
bool IsCertain = true);
/// Return whether @p MA can be used for transformations (e.g. OpTree load
/// forwarding, DeLICM mapping).
bool isCompatibleAccess(MemoryAccess *MA);
@ -251,9 +316,29 @@ protected:
/// Compute the different zones.
void computeCommon();
/// Compute the normalization map that replaces PHIs by their incoming
/// values.
///
/// @see #NormalizeMap
void computeNormalizedPHIs();
/// Print the current state of all MemoryAccesses to @p.
void printAccesses(llvm::raw_ostream &OS, int Indent = 0) const;
/// Is @p MA a PHI READ access that can be normalized?
///
/// @see #NormalizeMap
bool isNormalizable(MemoryAccess *MA);
/// @{
/// Determine whether the argument does not map to any computed PHI. Those
/// should have been replaced by their incoming values.
///
/// @see #NormalizedPHI
bool isNormalized(isl::map Map);
bool isNormalized(isl::union_map Map);
/// @}
public:
/// Return the SCoP this object is analyzing.
Scop *getScop() const { return S; }

View File

@ -660,52 +660,6 @@ private:
return std::make_pair(DefUses, Lifetime);
}
/// For each 'execution' of a PHINode, get the incoming block that was
/// executed before.
///
/// For each PHI instance we can directly determine which was the incoming
/// block, and hence derive which value the PHI has.
///
/// @param SAI The ScopArrayInfo representing the PHI's storage.
///
/// @return { DomainPHIRead[] -> DomainPHIWrite[] }
isl::union_map computePerPHI(const ScopArrayInfo *SAI) {
assert(SAI->isPHIKind());
// { DomainPHIWrite[] -> Scatter[] }
auto PHIWriteScatter = makeEmptyUnionMap();
// Collect all incoming block timepoint.
for (auto *MA : S->getPHIIncomings(SAI)) {
auto Scatter = getScatterFor(MA);
PHIWriteScatter =
give(isl_union_map_add_map(PHIWriteScatter.take(), Scatter.take()));
}
// { DomainPHIRead[] -> Scatter[] }
auto PHIReadScatter = getScatterFor(S->getPHIRead(SAI));
// { DomainPHIRead[] -> Scatter[] }
auto BeforeRead = beforeScatter(PHIReadScatter, true);
// { Scatter[] }
auto WriteTimes = singleton(
give(isl_union_map_range(PHIWriteScatter.copy())), ScatterSpace);
// { DomainPHIRead[] -> Scatter[] }
auto PHIWriteTimes =
give(isl_map_intersect_range(BeforeRead.take(), WriteTimes.take()));
auto LastPerPHIWrites = give(isl_map_lexmax(PHIWriteTimes.take()));
// { DomainPHIRead[] -> DomainPHIWrite[] }
auto Result = give(isl_union_map_apply_range(
isl_union_map_from_map(LastPerPHIWrites.take()),
isl_union_map_reverse(PHIWriteScatter.take())));
assert(isl_union_map_is_single_valued(Result.keep()) == isl_bool_true);
assert(isl_union_map_is_injective(Result.keep()) == isl_bool_true);
return Result;
}
/// Try to map a MemoryKind::Value to a given array element.
///
/// @param SAI Representation of the scalar's memory to map.

View File

@ -51,6 +51,11 @@ static cl::opt<bool>
cl::desc("Analyze array contents for load forwarding"),
cl::cat(PollyCategory), cl::init(true), cl::Hidden);
static cl::opt<bool>
NormalizePHIs("polly-optree-normalize-phi",
cl::desc("Replace PHIs by their incoming values"),
cl::cat(PollyCategory), cl::init(false), cl::Hidden);
static cl::opt<unsigned>
MaxOps("polly-optree-max-ops",
cl::desc("Maximum number of ISL operations to invest for known "
@ -280,16 +285,19 @@ public:
IslQuotaScope QuotaScope = MaxOpGuard.enter();
computeCommon();
if (NormalizePHIs)
computeNormalizedPHIs();
Known = computeKnown(true, true);
// Preexisting ValInsts use the known content analysis of themselves.
Translator = makeIdentityMap(Known.range(), false);
}
if (!Known || !Translator) {
if (!Known || !Translator || !NormalizeMap) {
assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota);
Known = nullptr;
Translator = nullptr;
NormalizeMap = nullptr;
DEBUG(dbgs() << "Known analysis exceeded max_operations\n");
return false;
}
@ -462,6 +470,7 @@ public:
// { DomainDef[] -> ValInst[] }
isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
assert(isNormalized(ExpectedVal) && "LoadInsts are always normalized");
// { DomainTarget[] -> ValInst[] }
isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);
@ -578,7 +587,7 @@ public:
}
// { DomainDef[] -> ValInst[] }
isl::union_map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop);
isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop);
// { DomainTarget[] -> ValInst[] }
isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget);

View File

@ -160,6 +160,9 @@
STATISTIC(NumIncompatibleArrays, "Number of not zone-analyzable arrays");
STATISTIC(NumCompatibleArrays, "Number of zone-analyzable arrays");
STATISTIC(NumRecursivePHIs, "Number of recursive PHIs");
STATISTIC(NumNormalizablePHIs, "Number of normalizable PHIs");
STATISTIC(NumPHINormialization, "Number of PHI executed normalizations");
using namespace polly;
using namespace llvm;
@ -409,7 +412,8 @@ void ZoneAlgorithm::addArrayReadAccess(MemoryAccess *MA) {
}
}
isl::map ZoneAlgorithm::getWrittenValue(MemoryAccess *MA, isl::map AccRel) {
isl::union_map ZoneAlgorithm::getWrittenValue(MemoryAccess *MA,
isl::map AccRel) {
if (!MA->isMustWrite())
return {};
@ -423,7 +427,7 @@ isl::map ZoneAlgorithm::getWrittenValue(MemoryAccess *MA, isl::map AccRel) {
if (AccVal &&
AccVal->getType() == MA->getLatestScopArrayInfo()->getElementType() &&
AccRel.is_single_valued().is_true())
return makeValInst(AccVal, Stmt, L);
return makeNormalizedValInst(AccVal, Stmt, L);
// memset(_, '0', ) is equivalent to writing the null value to all touched
// elements. isMustWrite() ensures that all of an element's bytes are
@ -433,7 +437,7 @@ isl::map ZoneAlgorithm::getWrittenValue(MemoryAccess *MA, isl::map AccRel) {
Type *Ty = MA->getLatestScopArrayInfo()->getElementType();
if (WrittenConstant && WrittenConstant->isZeroValue()) {
Constant *Zero = Constant::getNullValue(Ty);
return makeValInst(Zero, Stmt, L);
return makeNormalizedValInst(Zero, Stmt, L);
}
}
@ -455,7 +459,7 @@ void ZoneAlgorithm::addArrayWriteAccess(MemoryAccess *MA) {
AllMayWrites = AllMayWrites.add_map(AccRel);
// { Domain[] -> ValInst[] }
isl::map WriteValInstance = getWrittenValue(MA, AccRel);
isl::union_map WriteValInstance = getWrittenValue(MA, AccRel);
if (!WriteValInstance)
WriteValInstance = makeUnknownForDomain(Stmt);
@ -463,9 +467,90 @@ void ZoneAlgorithm::addArrayWriteAccess(MemoryAccess *MA) {
isl::map IncludeElement = AccRel.domain_map().curry();
// { [Element[] -> DomainWrite[]] -> ValInst[] }
isl::map EltWriteValInst = WriteValInstance.apply_domain(IncludeElement);
isl::union_map EltWriteValInst =
WriteValInstance.apply_domain(IncludeElement);
AllWriteValInst = AllWriteValInst.add_map(EltWriteValInst);
AllWriteValInst = AllWriteValInst.unite(EltWriteValInst);
}
/// Return whether @p PHI refers (also transitively through other PHIs) to
/// itself.
///
/// loop:
/// %phi1 = phi [0, %preheader], [%phi1, %loop]
/// br i1 %c, label %loop, label %exit
///
/// exit:
/// %phi2 = phi [%phi1, %bb]
///
/// In this example, %phi1 is recursive, but %phi2 is not.
static bool isRecursivePHI(const PHINode *PHI) {
SmallVector<const PHINode *, 8> Worklist;
SmallPtrSet<const PHINode *, 8> Visited;
Worklist.push_back(PHI);
while (!Worklist.empty()) {
const PHINode *Cur = Worklist.pop_back_val();
if (Visited.count(Cur))
continue;
Visited.insert(Cur);
for (const Use &Incoming : Cur->incoming_values()) {
Value *IncomingVal = Incoming.get();
auto *IncomingPHI = dyn_cast<PHINode>(IncomingVal);
if (!IncomingPHI)
continue;
if (IncomingPHI == PHI)
return true;
Worklist.push_back(IncomingPHI);
}
}
return false;
}
isl::union_map ZoneAlgorithm::computePerPHI(const ScopArrayInfo *SAI) {
// TODO: If the PHI has an incoming block from before the SCoP, it is not
// represented in any ScopStmt.
auto *PHI = cast<PHINode>(SAI->getBasePtr());
auto It = PerPHIMaps.find(PHI);
if (It != PerPHIMaps.end())
return It->second;
assert(SAI->isPHIKind());
// { DomainPHIWrite[] -> Scatter[] }
isl::union_map PHIWriteScatter = makeEmptyUnionMap();
// Collect all incoming block timepoints.
for (MemoryAccess *MA : S->getPHIIncomings(SAI)) {
isl::map Scatter = getScatterFor(MA);
PHIWriteScatter = PHIWriteScatter.add_map(Scatter);
}
// { DomainPHIRead[] -> Scatter[] }
isl::map PHIReadScatter = getScatterFor(S->getPHIRead(SAI));
// { DomainPHIRead[] -> Scatter[] }
isl::map BeforeRead = beforeScatter(PHIReadScatter, true);
// { Scatter[] }
isl::set WriteTimes = singleton(PHIWriteScatter.range(), ScatterSpace);
// { DomainPHIRead[] -> Scatter[] }
isl::map PHIWriteTimes = BeforeRead.intersect_range(WriteTimes);
isl::map LastPerPHIWrites = PHIWriteTimes.lexmax();
// { DomainPHIRead[] -> DomainPHIWrite[] }
isl::union_map Result =
isl::union_map(LastPerPHIWrites).apply_range(PHIWriteScatter.reverse());
assert(!Result.is_single_valued().is_false());
assert(!Result.is_injective().is_false());
PerPHIMaps.insert({PHI, Result});
return Result;
}
isl::union_set ZoneAlgorithm::makeEmptyUnionSet() const {
@ -681,6 +766,57 @@ isl::map ZoneAlgorithm::makeValInst(Value *Val, ScopStmt *UserStmt, Loop *Scope,
llvm_unreachable("Unhandled use type");
}
/// Remove all computed PHIs out of @p Input and replace by their incoming
/// value.
///
/// @param Input { [] -> ValInst[] }
/// @param ComputedPHIs Set of PHIs that are replaced. Its ValInst must appear
/// on the LHS of @p NormalizeMap.
/// @param NormalizeMap { ValInst[] -> ValInst[] }
static isl::union_map normalizeValInst(isl::union_map Input,
const DenseSet<PHINode *> &ComputedPHIs,
isl::union_map NormalizeMap) {
isl::union_map Result = isl::union_map::empty(Input.get_space());
Input.foreach_map(
[&Result, &ComputedPHIs, &NormalizeMap](isl::map Map) -> isl::stat {
isl::space Space = Map.get_space();
isl::space RangeSpace = Space.range();
// Instructions within the SCoP are always wrapped. Non-wrapped tuples
// are therefore invariant in the SCoP and don't need normalization.
if (!RangeSpace.is_wrapping()) {
Result = Result.add_map(Map);
return isl::stat::ok;
}
auto *PHI = dyn_cast<PHINode>(static_cast<Value *>(
RangeSpace.unwrap().get_tuple_id(isl::dim::out).get_user()));
// If no normalization is necessary, then the ValInst stands for itself.
if (!ComputedPHIs.count(PHI)) {
Result = Result.add_map(Map);
return isl::stat::ok;
}
// Otherwise, apply the normalization.
isl::union_map Mapped = isl::union_map(Map).apply_range(NormalizeMap);
Result = Result.unite(Mapped);
NumPHINormialization++;
return isl::stat::ok;
});
return Result;
}
isl::union_map ZoneAlgorithm::makeNormalizedValInst(llvm::Value *Val,
ScopStmt *UserStmt,
llvm::Loop *Scope,
bool IsCertain) {
isl::map ValInst = makeValInst(Val, UserStmt, Scope, IsCertain);
isl::union_map Normalized =
normalizeValInst(ValInst, ComputedPHIs, NormalizeMap);
return Normalized;
}
bool ZoneAlgorithm::isCompatibleAccess(MemoryAccess *MA) {
if (!MA)
return false;
@ -690,6 +826,64 @@ bool ZoneAlgorithm::isCompatibleAccess(MemoryAccess *MA) {
return isa<StoreInst>(AccInst) || isa<LoadInst>(AccInst);
}
bool ZoneAlgorithm::isNormalizable(MemoryAccess *MA) {
assert(MA->isRead());
// Exclude ExitPHIs, we are assuming that a normalizable PHI has a READ
// MemoryAccess.
if (!MA->isOriginalPHIKind())
return false;
// Exclude recursive PHIs, normalizing them would require a transitive
// closure.
auto *PHI = cast<PHINode>(MA->getAccessInstruction());
if (RecursivePHIs.count(PHI))
return false;
// Ensure that each incoming value can be represented by a ValInst[].
// We do represent values from statements associated to multiple incoming
// value by the PHI itself, but we do not handle this case yet (especially
// isNormalized()) when normalizing.
const ScopArrayInfo *SAI = MA->getOriginalScopArrayInfo();
auto Incomings = S->getPHIIncomings(SAI);
for (MemoryAccess *Incoming : Incomings) {
if (Incoming->getIncoming().size() != 1)
return false;
}
return true;
}
bool ZoneAlgorithm::isNormalized(isl::map Map) {
isl::space Space = Map.get_space();
isl::space RangeSpace = Space.range();
if (!RangeSpace.is_wrapping())
return true;
auto *PHI = dyn_cast<PHINode>(static_cast<Value *>(
RangeSpace.unwrap().get_tuple_id(isl::dim::out).get_user()));
if (!PHI)
return true;
auto *IncomingStmt = static_cast<ScopStmt *>(
RangeSpace.unwrap().get_tuple_id(isl::dim::in).get_user());
MemoryAccess *PHIRead = IncomingStmt->lookupPHIReadOf(PHI);
if (!isNormalizable(PHIRead))
return true;
return false;
}
bool ZoneAlgorithm::isNormalized(isl::union_map UMap) {
auto Result = UMap.foreach_map([this](isl::map Map) -> isl::stat {
if (isNormalized(Map))
return isl::stat::ok;
return isl::stat::error;
});
return Result == isl::stat::ok;
}
void ZoneAlgorithm::computeCommon() {
AllReads = makeEmptyUnionMap();
AllMayWrites = makeEmptyUnionMap();
@ -697,6 +891,11 @@ void ZoneAlgorithm::computeCommon() {
AllWriteValInst = makeEmptyUnionMap();
AllReadValInst = makeEmptyUnionMap();
// Default to empty, i.e. no normalization/replacement is taking place. Call
// computeNormalizedPHIs() to initialize.
NormalizeMap = makeEmptyUnionMap();
ComputedPHIs.clear();
for (auto &Stmt : *S) {
for (auto *MA : Stmt) {
if (!MA->isLatestArrayKind())
@ -720,6 +919,97 @@ void ZoneAlgorithm::computeCommon() {
simplify(WriteReachDefZone);
}
void ZoneAlgorithm::computeNormalizedPHIs() {
// Determine which PHIs can reference themselves. They are excluded from
// normalization to avoid problems with transitive closures.
for (ScopStmt &Stmt : *S) {
for (MemoryAccess *MA : Stmt) {
if (!MA->isPHIKind())
continue;
if (!MA->isRead())
continue;
// TODO: Can be more efficient since isRecursivePHI can theoretically
// determine recursiveness for multiple values and/or cache results.
auto *PHI = cast<PHINode>(MA->getAccessInstruction());
if (isRecursivePHI(PHI)) {
NumRecursivePHIs++;
RecursivePHIs.insert(PHI);
}
}
}
// { PHIValInst[] -> IncomingValInst[] }
isl::union_map AllPHIMaps = makeEmptyUnionMap();
// Discover new PHIs and try to normalize them.
DenseSet<PHINode *> AllPHIs;
for (ScopStmt &Stmt : *S) {
for (MemoryAccess *MA : Stmt) {
if (!MA->isOriginalPHIKind())
continue;
if (!MA->isRead())
continue;
if (!isNormalizable(MA))
continue;
auto *PHI = cast<PHINode>(MA->getAccessInstruction());
const ScopArrayInfo *SAI = MA->getOriginalScopArrayInfo();
// { PHIDomain[] -> PHIValInst[] }
isl::map PHIValInst = makeValInst(PHI, &Stmt, Stmt.getSurroundingLoop());
// { IncomingDomain[] -> IncomingValInst[] }
isl::union_map IncomingValInsts = makeEmptyUnionMap();
// Get all incoming values.
for (MemoryAccess *MA : S->getPHIIncomings(SAI)) {
ScopStmt *IncomingStmt = MA->getStatement();
auto Incoming = MA->getIncoming();
assert(Incoming.size() == 1 && "The incoming value must be "
"representable by something else than "
"the PHI itself");
Value *IncomingVal = Incoming[0].second;
// { IncomingDomain[] -> IncomingValInst[] }
isl::map IncomingValInst = makeValInst(
IncomingVal, IncomingStmt, IncomingStmt->getSurroundingLoop());
IncomingValInsts = IncomingValInsts.add_map(IncomingValInst);
}
// Determine which instance of the PHI statement corresponds to which
// incoming value.
// { PHIDomain[] -> IncomingDomain[] }
isl::union_map PerPHI = computePerPHI(SAI);
// { PHIValInst[] -> IncomingValInst[] }
isl::union_map PHIMap =
PerPHI.apply_domain(PHIValInst).apply_range(IncomingValInsts);
assert(!PHIMap.is_single_valued().is_false());
// Resolve transitiveness: The incoming value of the newly discovered PHI
// may reference a previously normalized PHI. At the same time, already
// normalized PHIs might be normalized to the new PHI. At the end, none of
// the PHIs may appear on the right-hand-side of the normalization map.
PHIMap = normalizeValInst(PHIMap, AllPHIs, AllPHIMaps);
AllPHIs.insert(PHI);
AllPHIMaps = normalizeValInst(AllPHIMaps, AllPHIs, PHIMap);
AllPHIMaps = AllPHIMaps.unite(PHIMap);
NumNormalizablePHIs++;
}
}
simplify(AllPHIMaps);
// Apply the normalization.
ComputedPHIs = AllPHIs;
NormalizeMap = AllPHIMaps;
assert(!NormalizeMap || isNormalized(NormalizeMap));
}
void ZoneAlgorithm::printAccesses(llvm::raw_ostream &OS, int Indent) const {
OS.indent(Indent) << "After accesses {\n";
for (auto &Stmt : *S) {

View File

@ -0,0 +1,154 @@
; RUN: opt %loadPolly -polly-optree-normalize-phi=true -polly-optree -analyze < %s | FileCheck %s -match-full-lines
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define internal fastcc void @kernel_atax([2100 x double]* nocapture readonly %A, double* nocapture readonly %x, double* nocapture %y, double* nocapture %tmp) unnamed_addr #0 {
entry:
br label %entry.split
entry.split: ; preds = %entry
%y15 = bitcast double* %y to i8*
call void @llvm.memset.p0i8.i64(i8* %y15, i8 0, i64 16800, i32 8, i1 false)
br label %for.body3
for.body3: ; preds = %for.inc40, %entry.split
%indvars.iv8 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next9, %for.inc40 ]
%arrayidx5 = getelementptr inbounds double, double* %tmp, i64 %indvars.iv8
store double 0.000000e+00, double* %arrayidx5, align 8, !tbaa !6
br label %for.body8
for.body8: ; preds = %for.body8, %for.body3
%0 = phi double [ 0.000000e+00, %for.body3 ], [ %add, %for.body8 ]
%indvars.iv = phi i64 [ 0, %for.body3 ], [ %indvars.iv.next, %for.body8 ]
%arrayidx14 = getelementptr inbounds [2100 x double], [2100 x double]* %A, i64 %indvars.iv8, i64 %indvars.iv
%1 = load double, double* %arrayidx14, align 8, !tbaa !6
%arrayidx16 = getelementptr inbounds double, double* %x, i64 %indvars.iv
%2 = load double, double* %arrayidx16, align 8, !tbaa !6
%mul = fmul double %1, %2
%add = fadd double %0, %mul
store double %add, double* %arrayidx5, align 8, !tbaa !6
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 2
br i1 %exitcond, label %for.end21, label %for.body8
for.end21: ; preds = %for.body8
br label %for.body24
for.body24: ; preds = %for.body24.for.body24_crit_edge, %for.end21
%3 = phi double [ %add, %for.end21 ], [ %.pre, %for.body24.for.body24_crit_edge ]
%indvars.iv5 = phi i64 [ 0, %for.end21 ], [ %indvars.iv.next6, %for.body24.for.body24_crit_edge ]
%arrayidx26 = getelementptr inbounds double, double* %y, i64 %indvars.iv5
%4 = load double, double* %arrayidx26, align 8, !tbaa !6
%arrayidx30 = getelementptr inbounds [2100 x double], [2100 x double]* %A, i64 %indvars.iv8, i64 %indvars.iv5
%5 = load double, double* %arrayidx30, align 8, !tbaa !6
%mul33 = fmul double %5, %3
%add34 = fadd double %4, %mul33
store double %add34, double* %arrayidx26, align 8, !tbaa !6
%indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
%exitcond7 = icmp eq i64 %indvars.iv.next6, 2
br i1 %exitcond7, label %for.inc40, label %for.body24.for.body24_crit_edge
for.body24.for.body24_crit_edge: ; preds = %for.body24
%.pre = load double, double* %arrayidx5, align 8, !tbaa !6
br label %for.body24
for.inc40: ; preds = %for.body24
%indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1
%exitcond10 = icmp eq i64 %indvars.iv.next9, 2
br i1 %exitcond10, label %for.end42, label %for.body3
for.end42: ; preds = %for.inc40
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1
attributes #0 = { noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { argmemonly nounwind }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 6.0.0 (trunk 312565) (llvm/trunk 312564)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"any pointer", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C/C++ TBAA"}
!6 = !{!7, !7, i64 0}
!7 = !{!"double", !4, i64 0}
; CHECK: Statistics {
; CHECK: Operand trees forwarded: 2
; CHECK: Statements with forwarded operand trees: 2
; CHECK: }
; CHECK-NEXT: After statements {
; CHECK-NEXT: Stmt_for_body3
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_for_body3[i0] -> MemRef_tmp[i0] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_for_body3[i0] -> MemRef1__phi[] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: store double 0.000000e+00, double* %arrayidx5, align 8, !tbaa !2
; CHECK-NEXT: }
; CHECK-NEXT: Stmt_for_body8
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef1__phi[] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef1__phi[] };
; CHECK-NEXT: new: { Stmt_for_body8[i0, i1] -> MemRef_tmp[i0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef_A[i0, i1] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef_x[i1] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef_tmp[i0] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_for_body8[i0, i1] -> MemRef_add[] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: %0 = phi double [ 0.000000e+00, %for.body3 ], [ %add, %for.body8 ]
; CHECK-NEXT: %1 = load double, double* %arrayidx14, align 8, !tbaa !2
; CHECK-NEXT: %2 = load double, double* %arrayidx16, align 8, !tbaa !2
; CHECK-NEXT: %mul = fmul double %1, %2
; CHECK-NEXT: %add = fadd double %0, %mul
; CHECK-NEXT: store double %add, double* %arrayidx5, align 8, !tbaa !2
; CHECK-NEXT: %exitcond = icmp eq i64 %indvars.iv.next, 2
; CHECK-NEXT: }
; CHECK-NEXT: Stmt_for_end21
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_for_end21[i0] -> MemRef_add[] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_for_end21[i0] -> MemRef5__phi[] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: }
; CHECK-NEXT: Stmt_for_body24
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_for_body24[i0, i1] -> MemRef5__phi[] };
; CHECK-NEXT: new: { Stmt_for_body24[i0, i1] -> MemRef_tmp[i0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_for_body24[i0, i1] -> MemRef_y[i1] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_for_body24[i0, i1] -> MemRef_A[i0, i1] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_for_body24[i0, i1] -> MemRef_y[i1] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: %3 = phi double [ %add, %for.end21 ], [ %.pre, %for.body24.for.body24_crit_edge ]
; CHECK-NEXT: %4 = load double, double* %arrayidx26, align 8, !tbaa !2
; CHECK-NEXT: %5 = load double, double* %arrayidx30, align 8, !tbaa !2
; CHECK-NEXT: %mul33 = fmul double %5, %3
; CHECK-NEXT: %add34 = fadd double %4, %mul33
; CHECK-NEXT: store double %add34, double* %arrayidx26, align 8, !tbaa !2
; CHECK-NEXT: %exitcond7 = icmp eq i64 %indvars.iv.next6, 2
; CHECK-NEXT: }
; CHECK-NEXT: Stmt_for_body24_for_body24_crit_edge
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_for_body24_for_body24_crit_edge[i0, i1] -> MemRef5__phi[] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_for_body24_for_body24_crit_edge[i0, i1] -> MemRef_tmp[i0] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: %.pre = load double, double* %arrayidx5, align 8, !tbaa !2
; CHECK-NEXT: }
; CHECK-NEXT: }

View File

@ -0,0 +1,82 @@
; RUN: opt %loadPolly -polly-optree-normalize-phi=true -polly-optree -analyze < %s | FileCheck %s -match-full-lines
;
; Rematerialize a load.
;
; for (int j = 0; j < n; j += 1) {
; bodyA:
; double val = B[j];
;
; bodyB:
; double phi = val;
;
; bodyC:
; A[j] = phi;
; }
;
define void @func(i32 %n, double* noalias nonnull %A, double* noalias nonnull %B) {
entry:
br label %for
for:
%j = phi i32 [0, %entry], [%j.inc, %inc]
%j.cmp = icmp slt i32 %j, %n
br i1 %j.cmp, label %bodyA, label %exit
bodyA:
%B_idx = getelementptr inbounds double, double* %B, i32 %j
%val = load double, double* %B_idx
br label %bodyB
bodyB:
%phi = phi double [%val, %bodyA]
br label %bodyC
bodyC:
%A_idx = getelementptr inbounds double, double* %A, i32 %j
store double %phi, double* %A_idx
br label %inc
inc:
%j.inc = add nuw nsw i32 %j, 1
br label %for
exit:
br label %return
return:
ret void
}
; CHECK: Statistics {
; CHECK: Reloads: 2
; CHECK: }
; CHECK-NEXT: After statements {
; CHECK-NEXT: Stmt_bodyA
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_B[i0] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [n] -> { Stmt_bodyA[i0] -> MemRef_phi__phi[] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: %val = load double, double* %B_idx
; CHECK-NEXT: }
; CHECK-NEXT: Stmt_bodyB
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_phi__phi[] };
; CHECK-NEXT: new: [n] -> { Stmt_bodyB[i0] -> MemRef_B[i0] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [n] -> { Stmt_bodyB[i0] -> MemRef_phi[] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: %phi = phi double [ %val, %bodyA ]
; CHECK-NEXT: }
; CHECK-NEXT: Stmt_bodyC
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [n] -> { Stmt_bodyC[i0] -> MemRef_A[i0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: [n] -> { Stmt_bodyC[i0] -> MemRef_phi[] };
; CHECK-NEXT: new: [n] -> { Stmt_bodyC[i0] -> MemRef_B[i0] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: store double %phi, double* %A_idx
; CHECK-NEXT: }
; CHECK-NEXT: }

View File

@ -0,0 +1,108 @@
; RUN: opt %loadPolly -polly-optree-normalize-phi=true -polly-optree -analyze < %s | FileCheck %s -match-full-lines
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define internal fastcc void @kernel_jacobi_1d(double* noalias nocapture %A, double* noalias nocapture %B) unnamed_addr #0 {
entry:
br label %entry.split
entry.split: ; preds = %entry
%arrayidx6.phi.trans.insert = getelementptr inbounds double, double* %A, i64 1
%arrayidx21.phi.trans.insert = getelementptr inbounds double, double* %B, i64 1
br label %for.body
for.body: ; preds = %for.inc33, %entry.split
%t.03 = phi i32 [ 0, %entry.split ], [ %inc34, %for.inc33 ]
%.pre = load double, double* %A, align 8, !tbaa !6
%.pre10 = load double, double* %arrayidx6.phi.trans.insert, align 8, !tbaa !6
br label %for.body3
for.body3: ; preds = %for.body3, %for.body
%0 = phi double [ %.pre10, %for.body ], [ %2, %for.body3 ]
%1 = phi double [ %.pre, %for.body ], [ %0, %for.body3 ]
%indvars.iv = phi i64 [ 1, %for.body ], [ %indvars.iv.next, %for.body3 ]
%add = fadd double %1, %0
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%arrayidx9 = getelementptr inbounds double, double* %A, i64 %indvars.iv.next
%2 = load double, double* %arrayidx9, align 8, !tbaa !6
%add10 = fadd double %add, %2
%mul = fmul double %add10, 3.333300e-01
%arrayidx12 = getelementptr inbounds double, double* %B, i64 %indvars.iv
store double %mul, double* %arrayidx12, align 8, !tbaa !6
%exitcond = icmp eq i64 %indvars.iv.next, 3
br i1 %exitcond, label %for.end, label %for.body3
for.end: ; preds = %for.body3
%.pre11 = load double, double* %B, align 8, !tbaa !6
%.pre12 = load double, double* %arrayidx21.phi.trans.insert, align 8, !tbaa !6
br label %for.inc33
for.inc33: ; preds = %for.body16
%inc34 = add nuw nsw i32 %t.03, 1
%exitcond9 = icmp eq i32 %inc34, 2
br i1 %exitcond9, label %for.end35, label %for.body
for.end35: ; preds = %for.inc33
ret void
}
attributes #0 = { noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 6.0.0 (llvm/trunk 312874)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"any pointer", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C/C++ TBAA"}
!6 = !{!7, !7, i64 0}
!7 = !{!"double", !4, i64 0}
; CHECK: Statistics {
; CHECK: Operand trees forwarded: 2
; CHECK: Statements with forwarded operand trees: 1
; CHECK: }
; CHECK-NEXT: After statements {
; CHECK-NEXT: Stmt_for_body
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[0] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[1] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef1__phi[] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef2__phi[] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: %.pre = load double, double* %A, align 8, !tbaa !2
; CHECK-NEXT: %.pre10 = load double, double* %arrayidx6.phi.trans.insert, align 8, !tbaa !2
; CHECK-NEXT: }
; CHECK-NEXT: Stmt_for_body3
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef1__phi[] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef1__phi[] };
; CHECK-NEXT: new: { Stmt_for_body3[i0, i1] -> MemRef_A[1 + i1] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef2__phi[] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 1]
; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef2__phi[] };
; CHECK-NEXT: new: { Stmt_for_body3[i0, i1] -> MemRef_A[i1] };
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef_A[2 + i1] };
; CHECK-NEXT: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: { Stmt_for_body3[i0, i1] -> MemRef_B[1 + i1] };
; CHECK-NEXT: Instructions {
; CHECK-NEXT: %0 = phi double [ %.pre10, %for.body ], [ %2, %for.body3 ]
; CHECK-NEXT: %1 = phi double [ %.pre, %for.body ], [ %0, %for.body3 ]
; CHECK-NEXT: %add = fadd double %1, %0
; CHECK-NEXT: %2 = load double, double* %arrayidx9, align 8, !tbaa !2
; CHECK-NEXT: %add10 = fadd double %add, %2
; CHECK-NEXT: %mul = fmul double %add10, 3.333300e-01
; CHECK-NEXT: store double %mul, double* %arrayidx12, align 8, !tbaa !2
; CHECK-NEXT: %exitcond = icmp eq i64 %indvars.iv.next, 3
; CHECK-NEXT: }
; CHECK-NEXT: }

View File

@ -0,0 +1,49 @@
; RUN: opt %loadPolly -polly-optree-normalize-phi=true -polly-optree -analyze < %s | FileCheck %s -match-full-lines
;
; Contains a self-referencing PHINode that would require a
; transitive closure to handle.
;
; for (int j = 0; j < n; j += 1) {
; double phi = 0.0;
; for (int i = 0; i < m; i += 1)
; phi = phi;
; A[j] = phi;
; }
;
define void @func(i32 %n, i32 %m, double* noalias nonnull %A) {
entry:
br label %for
for:
%j = phi i32 [0, %entry], [%j.inc, %inc]
%j.cmp = icmp slt i32 %j, %n
br i1 %j.cmp, label %for.preheader, label %exit
for.preheader:
br label %for.inner
for.inner:
%i = phi i32 [0, %for.preheader], [%i.inc, %for.inner]
%phi = phi double [0.0, %for.preheader], [%phi, %for.inner]
%i.inc = add nuw nsw i32 %i, 1
%i.cmp = icmp slt i32 %i.inc, %m
br i1 %i.cmp, label %for.inner, label %for.exit
for.exit:
%A_idx = getelementptr inbounds double, double* %A, i32 %j
store double %phi, double* %A_idx
br label %inc
inc:
%j.inc = add nuw nsw i32 %j, 1
br label %for
exit:
br label %return
return:
ret void
}
; CHECK: ForwardOpTree executed, but did not modify anything