From 9ad45adfd76a17d4631f491e5ad33cb1e312c256 Mon Sep 17 00:00:00 2001 From: Anna Thomas Date: Fri, 8 Jul 2016 22:15:08 +0000 Subject: [PATCH] Revert "InstCombine rule to fold truncs whose value is available" This reverts commit r274853. Caused failure in ppcBE build llvm-svn: 274943 --- llvm/include/llvm/Analysis/Loads.h | 38 ----- llvm/lib/Analysis/Loads.cpp | 43 +++--- .../InstCombine/InstCombineCasts.cpp | 24 +--- llvm/test/Transforms/InstCombine/trunc.ll | 130 ------------------ 4 files changed, 17 insertions(+), 218 deletions(-) diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 608e412eb667..39f80f489e12 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -58,44 +58,6 @@ bool isSafeToLoadUnconditionally(Value *V, unsigned Align, /// to scan in the block, used by FindAvailableLoadedValue(). extern cl::opt DefMaxInstsToScan; -/// \brief Scan backwards to see if we have the value of type \p AccessTy -/// at the memory address \p Ptr locally available within a -/// small number of instructions. If the value is available, return it. -/// -/// You can use this function to scan across multiple blocks: after you call -/// this function, if ScanFrom points at the beginning of the block, it's safe -/// to continue scanning the predecessors. -/// Note that we assume the \p *Ptr is accessed through a non-volatile but -/// potentially atomic load. Any other constraints should be verified at the -/// caller. -/// -/// \param Ptr The memory location whose contents we are retrieving -/// \param AccessTy The type (and size) of the contents we need from \p Ptr -/// \param IsAtomicMemOp specifies the atomicity of the memory operation that accesses -/// \p *Ptr. We verify atomicity constraints are satisfied when value forwarding -/// from another memory operation that has value \p *Ptr available. -/// \param ScanBB The basic block to scan. FIXME: This is redundant. -/// \param [in,out] ScanFrom The location to start scanning from. When this -/// function returns, it points at the last instruction scanned. -/// \param MaxInstsToScan The maximum number of instructions to scan. If this -/// is zero, the whole block will be scanned. -/// \param AA Optional pointer to alias analysis, to make the scan more -/// precise. -/// \param [out] AATags The aliasing metadata for the operation which produced -/// the value. FIXME: This is basically useless. -/// -/// \param [out] IsLoadCSE Whether the returned value is a load from the same -/// location in memory, as opposed to the value operand of a store. -/// -/// \returns The found value, or nullptr if no value is found. -Value *FindAvailableLoadedValue(Value *Ptr, Type *AccessTy, bool IsAtomicMemOp, - BasicBlock *ScanBB, - BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan, - AliasAnalysis *AA = nullptr, - AAMDNodes *AATags = nullptr, - bool *IsLoadCSE = nullptr); - /// \brief Scan backwards to see if we have the value of the given load /// available locally within a small number of instructions. /// diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 1a2f8e76c878..309a1d95efb6 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -297,17 +297,27 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden, "to scan backward from a given instruction, when searching for " "available loaded value")); - -Value *llvm::FindAvailableLoadedValue(Value *Ptr, Type *AccessTy, - bool IsAtomicMemOp, BasicBlock *ScanBB, +Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan, AliasAnalysis *AA, AAMDNodes *AATags, bool *IsLoadCSE) { - if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; + + Value *Ptr = Load->getPointerOperand(); + Type *AccessTy = Load->getType(); + + // We can never remove a volatile load + if (Load->isVolatile()) + return nullptr; + + // Anything stronger than unordered is currently unimplemented. + if (!Load->isUnordered()) + return nullptr; + const DataLayout &DL = ScanBB->getModule()->getDataLayout(); + // Try to get the store size for the type. uint64_t AccessSize = DL.getTypeStoreSize(AccessTy); @@ -338,7 +348,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, Type *AccessTy, // We can value forward from an atomic to a non-atomic, but not the // other way around. - if (LI->isAtomic() < IsAtomicMemOp) + if (LI->isAtomic() < Load->isAtomic()) return nullptr; if (AATags) @@ -359,7 +369,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, Type *AccessTy, // We can value forward from an atomic to a non-atomic, but not the // other way around. - if (SI->isAtomic() < IsAtomicMemOp) + if (SI->isAtomic() < Load->isAtomic()) return nullptr; if (AATags) @@ -403,24 +413,3 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, Type *AccessTy, // block. return nullptr; } - - -Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, - BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan, - AliasAnalysis *AA, AAMDNodes *AATags, - bool *IsLoadCSE) { - - // We can never remove a volatile load - if (Load->isVolatile()) - return nullptr; - - // Anything stronger than unordered is currently unimplemented. - if (!Load->isUnordered()) - return nullptr; - - // Return the full value of the load if available. - return FindAvailableLoadedValue(Load->getPointerOperand(), Load->getType(), - Load->isAtomic(), ScanBB, ScanFrom, - MaxInstsToScan, AA, AATags, IsLoadCSE); -} diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 395e83661d57..20556157188f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -13,10 +13,9 @@ #include "InstCombineInternal.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/Analysis/TargetLibraryInfo.h" using namespace llvm; using namespace PatternMatch; @@ -576,27 +575,6 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) { if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL)) return I; - // When trunc operand is a widened load, see if we can get the value from a - // previous store/load - if (auto *LI = dyn_cast(Src)) { - BasicBlock::iterator BBI(*LI); - - // Scan a few instructions up from LI and if we find a partial load/store - // of Type DestTy that feeds into LI, we can replace all uses of the trunc - // with the load/store value. - // This replacement can be done only in the case of non-volatile loads, with - // ordering at most unordered. If the load is atomic, its only use should be - // the trunc instruction. We don't want to allow other users of LI to see a - // value that is out of sync with the value we're folding the trunc to (in - // case of a race). - if (LI->isUnordered() && (!LI->isAtomic() || LI->hasOneUse())) - if (Value *AvailableVal = FindAvailableLoadedValue( - LI->getPointerOperand(), DestTy, LI->isAtomic(), LI->getParent(), - BBI, DefMaxInstsToScan)) - return replaceInstUsesWith( - CI, Builder->CreateBitOrPointerCast(AvailableVal, CI.getType(), - CI.getName() + ".cast")); - } return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/trunc.ll b/llvm/test/Transforms/InstCombine/trunc.ll index 4774827dc28a..2019b3a6df47 100644 --- a/llvm/test/Transforms/InstCombine/trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc.ll @@ -181,133 +181,3 @@ bb1: bb2: unreachable } - - -; the trunc can be replaced from value available from store -; load feeding into trunc left as-is. -declare void @consume(i8) readonly -define i1 @trunc_load_store(i8* align 2 %a) { - store i8 0, i8 *%a, align 2 - %bca = bitcast i8* %a to i16* - %wide.load = load i16, i16* %bca, align 2 - %lowhalf.1 = trunc i16 %wide.load to i8 - call void @consume(i8 %lowhalf.1) - %cmp.2 = icmp ult i16 %wide.load, 256 - ret i1 %cmp.2 -; CHECK-LABEL: @trunc_load_store -; CHECK: %wide.load = load i16, i16* %bca, align 2 -; CHECK-NOT: trunc -; CHECK: call void @consume(i8 0) -} - - -; The trunc can be replaced with the load value. -; both loads left as-is, since they have uses. -define i1 @trunc_load_load(i8* align 2 %a) { - %pload = load i8, i8* %a, align 2 - %bca = bitcast i8* %a to i16* - %wide.load = load i16, i16* %bca, align 2 - %lowhalf = trunc i16 %wide.load to i8 - call void @consume(i8 %lowhalf) - call void @consume(i8 %pload) - %cmp.2 = icmp ult i16 %wide.load, 256 - ret i1 %cmp.2 -; CHECK-LABEL: @trunc_load_load -; CHECK-NEXT: %pload = load i8, i8* %a, align 2 -; CHECK-NEXT: %bca = bitcast i8* %a to i16* -; CHECK-NEXT: %wide.load = load i16, i16* %bca, align 2 -; CHECK-NEXT: call void @consume(i8 %pload) -; CHECK-NEXT: call void @consume(i8 %pload) -; CHECK-NEXT: %cmp.2 = icmp ult i16 %wide.load, 256 -} - -; Store and load to same memory location address generated through GEP. -; trunc can be removed by using the store value. -define void @trunc_with_gep_memaccess(i16* align 2 %p) { - %t0 = getelementptr i16, i16* %p, i32 1 - store i16 2, i16* %t0 - %t1 = getelementptr i16, i16* %p, i32 1 - %x = load i16, i16* %t1 - %lowhalf = trunc i16 %x to i8 - call void @consume(i8 %lowhalf) - ret void -; CHECK-LABEL: @trunc_with_gep_memaccess -; CHECK-NOT: trunc -; CHECK: call void @consume(i8 2) -} - -; trunc should not be replaced since atomic load %wide.load has more than one use. -; different values can be seen by the uses of %wide.load in case of race. -define i1 @trunc_atomic_loads(i8* align 2 %a) { - %pload = load atomic i8, i8* %a unordered, align 2 - %bca = bitcast i8* %a to i16* - %wide.load = load atomic i16, i16* %bca unordered, align 2 - %lowhalf = trunc i16 %wide.load to i8 - call void @consume(i8 %lowhalf) - call void @consume(i8 %pload) - %cmp.2 = icmp ult i16 %wide.load, 256 - ret i1 %cmp.2 -; CHECK-LABEL: @trunc_atomic_loads -; CHECK: trunc -} - -; trunc can be replaced since atomic load has single use. -; atomic load is also removed since use is removed. -define void @trunc_atomic_single_load(i8* align 2 %a) { - %pload = load atomic i8, i8* %a unordered, align 2 - %bca = bitcast i8* %a to i16* - %wide.load = load atomic i16, i16* %bca unordered, align 2 - %lowhalf = trunc i16 %wide.load to i8 - call void @consume(i8 %lowhalf) - call void @consume(i8 %pload) - ret void -; CHECK-LABEL: @trunc_atomic_single_load -; CHECK-NOT: trunc -; CHECK-NOT: %wide.load = load atomic i16, i16* %bca unordered, align 2 -} - - -; trunc cannot be replaced since load's atomic ordering is higher than unordered -define void @trunc_atomic_monotonic(i8* align 2 %a) { - %pload = load atomic i8, i8* %a monotonic, align 2 - %bca = bitcast i8* %a to i16* - %wide.load = load atomic i16, i16* %bca monotonic, align 2 - %lowhalf = trunc i16 %wide.load to i8 - call void @consume(i8 %lowhalf) - call void @consume(i8 %pload) - ret void -; CHECK-LABEL: @trunc_atomic_monotonic -; CHECK: %wide.load = load atomic i16, i16* %bca monotonic, align 2 -; CHECK: trunc -} - -; trunc cannot be replaced since store size (i16) is not trunc result size (i8). -; FIXME: we could get the i8 content of trunc from the i16 store value. -define i1 @trunc_different_size_load(i16 * align 2 %a) { - store i16 0, i16 *%a, align 2 - %bca = bitcast i16* %a to i32* - %wide.load = load i32, i32* %bca, align 2 - %lowhalf = trunc i32 %wide.load to i8 - call void @consume(i8 %lowhalf) - %cmp.2 = icmp ult i32 %wide.load, 256 - ret i1 %cmp.2 -; CHECK-LABEL: @trunc_different_size_load -; CHECK: %lowhalf = trunc i32 %wide.load to i8 -} - -declare void @consume_f(float) readonly -; bitcast required since trunc result type and %fload are different types. -; so replace the trunc with bitcast. -define i1 @trunc_avoid_bitcast(float* %b) { - %fload = load float, float* %b - %bca = bitcast float* %b to i64* - %iload = load i64, i64* %bca - %low32 = trunc i64 %iload to i32 - call void @consume_f(float %fload) - %cmp.2 = icmp ult i32 %low32, 256 - ret i1 %cmp.2 -; CHECK-LABEL: @trunc_avoid_bitcast -; CHECK-NOT: %low32 = trunc i64 %iload to i32 -; CHECK: %low32.cast = bitcast float %fload to i32 -; CHECK: %cmp.2 = icmp ult i32 %low32.cast, 256 -}