Introduce llvm.load.relative intrinsic.

This intrinsic takes two arguments, ``%ptr`` and ``%offset``. It loads
a 32-bit value from the address ``%ptr + %offset``, adds ``%ptr`` to that
value and returns it. The constant folder specifically recognizes the form of
this intrinsic and the constant initializers it may load from; if a loaded
constant initializer is known to have the form ``i32 trunc(x - %ptr)``,
the intrinsic call is folded to ``x``.

LLVM provides that the calculation of such a constant initializer will
not overflow at link time under the medium code model if ``x`` is an
``unnamed_addr`` function. However, it does not provide this guarantee for
a constant initializer folded into a function body. This intrinsic can be
used to avoid the possibility of overflows when loading from such a constant.

Differential Revision: http://reviews.llvm.org/D18367

llvm-svn: 267223
This commit is contained in:
Peter Collingbourne 2016-04-22 21:18:02 +00:00
parent b29465fe47
commit 7dd8dbf486
15 changed files with 311 additions and 1 deletions

View File

@ -12361,6 +12361,31 @@ if"); and this allows for "check widening" type optimizations.
``@llvm.experimental.guard`` cannot be invoked.
'``llvm.load.relative``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
::
declare i8* @llvm.load.relative.iN(i8* %ptr, iN %offset) argmemonly nounwind readonly
Overview:
"""""""""
This intrinsic loads a 32-bit value from the address ``%ptr + %offset``,
adds ``%ptr`` to that value and returns it. The constant folder specifically
recognizes the form of this intrinsic and the constant initializers it may
load from; if a loaded constant initializer is known to have the form
``i32 trunc(x - %ptr)``, the intrinsic call is folded to ``x``.
LLVM provides that the calculation of such a constant initializer will
not overflow at link time under the medium code model if ``x`` is an
``unnamed_addr`` function. However, it does not provide this guarantee for
a constant initializer folded into a function body. This intrinsic can be
used to avoid the possibility of overflows when loading from such a constant.
Stack Map Intrinsics
--------------------

View File

@ -680,6 +680,11 @@ namespace llvm {
///
ModulePass *createLowerEmuTLSPass(const TargetMachine *TM);
/// This pass lowers the @llvm.load.relative intrinsic to instructions.
/// This is unsafe to do earlier because a pass may combine the constant
/// initializer into the load, which may result in an overflowing evaluation.
ModulePass *createPreISelIntrinsicLoweringPass();
/// GlobalMerge - This pass merges internal (by default) globals into structs
/// to enable reuse of a base pointer by indexed addressing modes.
/// It can also be configured to focus on size optimizations only.

View File

@ -669,6 +669,9 @@ def int_masked_scatter: Intrinsic<[],
def int_bitset_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty],
[IntrNoMem]>;
def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
[IntrReadMem, IntrArgMemOnly]>;
//===----------------------------------------------------------------------===//
// Target-specific intrinsics
//===----------------------------------------------------------------------===//

View File

@ -246,6 +246,7 @@ void initializePostOrderFunctionAttrsLegacyPassPass(PassRegistry&);
void initializePostRAHazardRecognizerPass(PassRegistry&);
void initializePostRASchedulerPass(PassRegistry&);
void initializePostMachineSchedulerPass(PassRegistry&);
void initializePreISelIntrinsicLoweringPass(PassRegistry&);
void initializePrintFunctionPassWrapperPass(PassRegistry&);
void initializePrintModulePassWrapperPass(PassRegistry&);
void initializePrintBasicBlockPassPass(PassRegistry&);

View File

@ -900,6 +900,11 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
default:
return Base::visitCallSite(CS);
case Intrinsic::load_relative:
// This is normally lowered to 4 LLVM instructions.
Cost += 3 * InlineConstants::InstrCost;
return false;
case Intrinsic::memset:
case Intrinsic::memcpy:
case Intrinsic::memmove:

View File

@ -3825,6 +3825,62 @@ static bool IsIdempotent(Intrinsic::ID ID) {
}
}
static Value *SimplifyRelativeLoad(Constant *Ptr, Constant *Offset,
const DataLayout &DL) {
GlobalValue *PtrSym;
APInt PtrOffset;
if (!IsConstantOffsetFromGlobal(Ptr, PtrSym, PtrOffset, DL))
return nullptr;
Type *Int8PtrTy = Type::getInt8PtrTy(Ptr->getContext());
Type *Int32Ty = Type::getInt32Ty(Ptr->getContext());
Type *Int32PtrTy = Int32Ty->getPointerTo();
Type *Int64Ty = Type::getInt64Ty(Ptr->getContext());
auto *OffsetConstInt = dyn_cast<ConstantInt>(Offset);
if (!OffsetConstInt || OffsetConstInt->getType()->getBitWidth() > 64)
return nullptr;
uint64_t OffsetInt = OffsetConstInt->getSExtValue();
if (OffsetInt % 4 != 0)
return nullptr;
Constant *C = ConstantExpr::getGetElementPtr(
Int32Ty, ConstantExpr::getBitCast(Ptr, Int32PtrTy),
ConstantInt::get(Int64Ty, OffsetInt / 4));
Constant *Loaded = ConstantFoldLoadFromConstPtr(C, Int32Ty, DL);
if (!Loaded)
return nullptr;
auto *LoadedCE = dyn_cast<ConstantExpr>(Loaded);
if (!LoadedCE)
return nullptr;
if (LoadedCE->getOpcode() == Instruction::Trunc) {
LoadedCE = dyn_cast<ConstantExpr>(LoadedCE->getOperand(0));
if (!LoadedCE)
return nullptr;
}
if (LoadedCE->getOpcode() != Instruction::Sub)
return nullptr;
auto *LoadedLHS = dyn_cast<ConstantExpr>(LoadedCE->getOperand(0));
if (!LoadedLHS || LoadedLHS->getOpcode() != Instruction::PtrToInt)
return nullptr;
auto *LoadedLHSPtr = LoadedLHS->getOperand(0);
Constant *LoadedRHS = LoadedCE->getOperand(1);
GlobalValue *LoadedRHSSym;
APInt LoadedRHSOffset;
if (!IsConstantOffsetFromGlobal(LoadedRHS, LoadedRHSSym, LoadedRHSOffset,
DL) ||
PtrSym != LoadedRHSSym || PtrOffset != LoadedRHSOffset)
return nullptr;
return ConstantExpr::getBitCast(LoadedLHSPtr, Int8PtrTy);
}
template <typename IterTy>
static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
const Query &Q, unsigned MaxRecurse) {
@ -3865,6 +3921,11 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd,
if (match(RHS, m_Undef()))
return Constant::getNullValue(ReturnType);
}
if (IID == Intrinsic::load_relative && isa<Constant>(LHS) &&
isa<Constant>(RHS))
return SimplifyRelativeLoad(cast<Constant>(LHS), cast<Constant>(RHS),
Q.DL);
}
// Perform idempotent optimizations

View File

@ -87,6 +87,7 @@ add_llvm_library(LLVMCodeGen
PHIEliminationUtils.cpp
PostRAHazardRecognizer.cpp
PostRASchedulerList.cpp
PreISelIntrinsicLowering.cpp
ProcessImplicitDefs.cpp
PrologEpilogInserter.cpp
PseudoSourceValue.cpp

View File

@ -63,6 +63,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializePostMachineSchedulerPass(Registry);
initializePostRAHazardRecognizerPass(Registry);
initializePostRASchedulerPass(Registry);
initializePreISelIntrinsicLoweringPass(Registry);
initializeProcessImplicitDefsPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeShrinkWrapPass(Registry);

View File

@ -102,6 +102,8 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
if (TM->Options.EmulatedTLS)
PM.add(createLowerEmuTLSPass(TM));
PM.add(createPreISelIntrinsicLoweringPass());
// Add internal analysis passes from the target machine.
PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));

View File

@ -0,0 +1,85 @@
//===-- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ---===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass implements IR lowering for the llvm.load.relative intrinsic.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
using namespace llvm;
namespace {
bool lowerLoadRelative(Function &F) {
if (F.use_empty())
return false;
bool Changed = false;
Type *Int32Ty = Type::getInt32Ty(F.getContext());
Type *Int32PtrTy = Int32Ty->getPointerTo();
Type *Int8Ty = Type::getInt8Ty(F.getContext());
for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
auto CI = dyn_cast<CallInst>(I->getUser());
++I;
if (!CI || CI->getCalledValue() != &F)
continue;
IRBuilder<> B(CI);
Value *OffsetPtr =
B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1));
Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy);
Value *OffsetI32 = B.CreateAlignedLoad(OffsetPtrI32, 4);
Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32);
CI->replaceAllUsesWith(ResultPtr);
CI->eraseFromParent();
Changed = true;
}
return Changed;
}
bool lowerIntrinsics(Module &M) {
bool Changed = false;
for (Function &F : M) {
if (F.getName().startswith("llvm.load.relative."))
Changed |= lowerLoadRelative(F);
}
return Changed;
}
class PreISelIntrinsicLowering : public ModulePass {
public:
static char ID;
PreISelIntrinsicLowering() : ModulePass(ID) {}
bool runOnModule(Module &M) {
return lowerIntrinsics(M);
}
};
char PreISelIntrinsicLowering::ID;
}
INITIALIZE_PASS(PreISelIntrinsicLowering, "pre-isel-intrinsic-lowering",
"Pre-ISel Intrinsic Lowering", false, false)
ModulePass *llvm::createPreISelIntrinsicLoweringPass() {
return new PreISelIntrinsicLowering;
}

View File

@ -6,6 +6,6 @@
; STOP-NEXT: Machine Function Analysis
; STOP-NEXT: MIR Printing Pass
; START: -machine-branch-prob -gc-lowering
; START: -machine-branch-prob -pre-isel-intrinsic-lowering
; START: FunctionPass Manager
; START-NEXT: Lower Garbage Collection Instructions

View File

@ -0,0 +1,19 @@
; RUN: opt < %s -instsimplify -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
target triple = "i386-unknown-linux-gnu"
@a = external global i8
@c1 = constant [3 x i32] [i32 0, i32 0,
i32 sub (i32 ptrtoint (i8* @a to i32), i32 ptrtoint (i32* getelementptr ([3 x i32], [3 x i32]* @c1, i32 0, i32 2) to i32))
]
; CHECK: @f1
define i8* @f1() {
; CHECK: ret i8* @a
%l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([3 x i32], [3 x i32]* @c1, i32 0, i32 2) to i8*), i32 0)
ret i8* %l
}
declare i8* @llvm.load.relative.i32(i8*, i32)

View File

@ -0,0 +1,75 @@
; RUN: opt < %s -instsimplify -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@a = external global i8
@b = external global i8
@c1 = constant i32 trunc (i64 sub (i64 ptrtoint (i8* @a to i64), i64 ptrtoint (i32* @c1 to i64)) to i32)
@c2 = constant [7 x i32] [i32 0, i32 0,
i32 trunc (i64 sub (i64 ptrtoint (i8* @a to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32),
i32 trunc (i64 sub (i64 ptrtoint (i8* @b to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32),
i32 trunc (i64 add (i64 ptrtoint (i8* @b to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32),
i32 trunc (i64 sub (i64 ptrtoint (i8* @b to i64), i64 1) to i32),
i32 trunc (i64 sub (i64 0, i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32)
]
; CHECK: @f1
define i8* @f1() {
; CHECK: ret i8* @a
%l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* @c1 to i8*), i32 0)
ret i8* %l
}
; CHECK: @f2
define i8* @f2() {
; CHECK: ret i8* @a
%l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 0)
ret i8* %l
}
; CHECK: @f3
define i8* @f3() {
; CHECK: ret i8* @b
%l = call i8* @llvm.load.relative.i64(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i64 4)
ret i8* %l
}
; CHECK: @f4
define i8* @f4() {
; CHECK: ret i8* %
%l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 1)
ret i8* %l
}
; CHECK: @f5
define i8* @f5() {
; CHECK: ret i8* %
%l = call i8* @llvm.load.relative.i32(i8* zeroinitializer, i32 0)
ret i8* %l
}
; CHECK: @f6
define i8* @f6() {
; CHECK: ret i8* %
%l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 8)
ret i8* %l
}
; CHECK: @f7
define i8* @f7() {
; CHECK: ret i8* %
%l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 12)
ret i8* %l
}
; CHECK: @f8
define i8* @f8() {
; CHECK: ret i8* %
%l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 16)
ret i8* %l
}
declare i8* @llvm.load.relative.i32(i8*, i32)
declare i8* @llvm.load.relative.i64(i8*, i64)

View File

@ -0,0 +1,26 @@
; RUN: opt -pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
; CHECK: define i8* @foo32(i8* [[P:%.*]], i32 [[O:%.*]])
define i8* @foo32(i8* %p, i32 %o) {
; CHECK: [[OP:%.*]] = getelementptr i8, i8* [[P]], i32 [[O]]
; CHECK: [[OPI32:%.*]] = bitcast i8* [[OP]] to i32*
; CHECK: [[OI32:%.*]] = load i32, i32* [[OPI32]], align 4
; CHECK: [[R:%.*]] = getelementptr i8, i8* [[P]], i32 [[OI32]]
; CHECK: ret i8* [[R]]
%l = call i8* @llvm.load.relative.i32(i8* %p, i32 %o)
ret i8* %l
}
; CHECK: define i8* @foo64(i8* [[P:%.*]], i64 [[O:%.*]])
define i8* @foo64(i8* %p, i64 %o) {
; CHECK: [[OP:%.*]] = getelementptr i8, i8* [[P]], i64 [[O]]
; CHECK: [[OPI32:%.*]] = bitcast i8* [[OP]] to i32*
; CHECK: [[OI32:%.*]] = load i32, i32* [[OPI32]], align 4
; CHECK: [[R:%.*]] = getelementptr i8, i8* [[P]], i32 [[OI32]]
; CHECK: ret i8* [[R]]
%l = call i8* @llvm.load.relative.i64(i8* %p, i64 %o)
ret i8* %l
}
declare i8* @llvm.load.relative.i32(i8*, i32)
declare i8* @llvm.load.relative.i64(i8*, i64)

View File

@ -353,6 +353,7 @@ int main(int argc, char **argv) {
initializeDwarfEHPreparePass(Registry);
initializeSafeStackPass(Registry);
initializeSjLjEHPreparePass(Registry);
initializePreISelIntrinsicLoweringPass(Registry);
#ifdef LINK_POLLY_INTO_TOOLS
polly::initializePollyPasses(Registry);