[NVPTX] Disable performance optimizations when OptLevel==None

Reviewers: jholewinski, tra, eliben

Subscribers: jholewinski, llvm-commits

Differential Revision: http://reviews.llvm.org/D16874

llvm-svn: 259749
This commit is contained in:
Jingyue Wu 2016-02-04 04:15:36 +00:00
parent d5c0e4d69b
commit f650441b04
2 changed files with 48 additions and 21 deletions

View File

@ -143,14 +143,20 @@ public:
void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
private:
// if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
// If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
// function is only called in opt mode.
void addEarlyCSEOrGVNPass();
// Add passes that propagate special memory spaces.
void addMemorySpaceInferencePasses();
// Add passes that perform straight-line scalar optimizations.
void addStraightLineScalarOptimizationPasses();
};
} // end anonymous namespace
TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
return PassConfig;
return new NVPTXPassConfig(this, PM);
}
TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
@ -166,22 +172,7 @@ void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
addPass(createEarlyCSEPass());
}
void NVPTXPassConfig::addIRPasses() {
// The following passes are known to not play well with virtual regs hanging
// around after register allocation (which in our case, is *all* registers).
// We explicitly disable them here. We do, however, need some functionality
// of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
disablePass(&PrologEpilogCodeInserterID);
disablePass(&MachineCopyPropagationID);
disablePass(&TailDuplicateID);
addPass(createNVVMReflectPass());
addPass(createNVPTXImageOptimizerPass());
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
// === Propagate special address spaces ===
void NVPTXPassConfig::addMemorySpaceInferencePasses() {
addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
// NVPTXLowerKernelArgs emits alloca for byval parameters which can often
// be eliminated by SROA.
@ -192,8 +183,9 @@ void NVPTXPassConfig::addIRPasses() {
// them unused. We could remove dead code in an ad-hoc manner, but that
// requires manual work and might be error-prone.
addPass(createDeadCodeEliminationPass());
}
// === Straight-line scalar optimizations ===
void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
addPass(createSeparateConstOffsetFromGEPPass());
addPass(createSpeculativeExecutionPass());
// ReassociateGEPs exposes more opportunites for SLSR. See
@ -208,6 +200,28 @@ void NVPTXPassConfig::addIRPasses() {
// NaryReassociate on GEPs creates redundant common expressions, so run
// EarlyCSE after it.
addPass(createEarlyCSEPass());
}
void NVPTXPassConfig::addIRPasses() {
// The following passes are known to not play well with virtual regs hanging
// around after register allocation (which in our case, is *all* registers).
// We explicitly disable them here. We do, however, need some functionality
// of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
disablePass(&PrologEpilogCodeInserterID);
disablePass(&MachineCopyPropagationID);
disablePass(&TailDuplicateID);
addPass(createNVVMReflectPass());
if (getOptLevel() != CodeGenOpt::None)
addPass(createNVPTXImageOptimizerPass());
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
if (getOptLevel() != CodeGenOpt::None) {
addMemorySpaceInferencePasses();
addStraightLineScalarOptimizationPasses();
}
// === LSR and other generic IR passes ===
TargetPassConfig::addIRPasses();
@ -223,7 +237,8 @@ void NVPTXPassConfig::addIRPasses() {
// %1 = shl %a, 2
//
// but EarlyCSE can do neither of them.
addEarlyCSEOrGVNPass();
if (getOptLevel() != CodeGenOpt::None)
addEarlyCSEOrGVNPass();
}
bool NVPTXPassConfig::addInstSelector() {

View File

@ -0,0 +1,12 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O0 | FileCheck %s
define void @foo(i32* %output) {
; CHECK-LABEL: .visible .func foo(
entry:
%local = alloca i32
; CHECK: __local_depot
store i32 1, i32* %local
%0 = load i32, i32* %local
store i32 %0, i32* %output
ret void
}