[NVPTX] Let there be One True Way to set NVVMReflect params.

Summary:
Previously there were three ways to inform the NVVMReflect pass whether
you wanted to flush denormals to zero:

  * An LLVM command-line option
  * Parameters to the NVVMReflect constructor
  * Metadata on the module itself.

This change removes the first two, leaving only the third.

The motivation for this change, aside from simplifying things, is that
we want LLVM to be aware of whether it's operating in FTZ mode, so other
passes can use this information.  Ideally we'd have a target-generic
piece of metadata on the module.  This change moves us in that
direction.

Reviewers: tra

Subscribers: jholewinski, llvm-commits

Differential Revision: https://reviews.llvm.org/D28700

llvm-svn: 292068
This commit is contained in:
Justin Lebar 2017-01-15 16:54:35 +00:00
parent 6aded2a0e4
commit 38746d9718
4 changed files with 66 additions and 111 deletions

View File

@ -289,7 +289,7 @@ code often follows a pattern:
return my_function_precise(a);
}
The default value for all unspecified reflection parameters is zero.
The default value for all unspecified reflection parameters is zero.
The ``NVVMReflect`` pass should be executed early in the optimization
pipeline, immediately after the link stage. The ``internalize`` pass is also
@ -326,6 +326,18 @@ often leave behind dead code of the form:
Therefore, it is recommended that ``NVVMReflect`` is executed early in the
optimization pipeline before dead-code elimination.
The NVPTX TargetMachine knows how to schedule ``NVVMReflect`` at the beginning
of your pass manager; just use the following code when setting up your pass
manager:
.. code-block:: c++
std::unique_ptr<TargetMachine> TM = ...;
PassManagerBuilder PMBuilder(...);
PMBuilder.addExtension(
PassManagerBuilder::EP_EarlyAsPossible,
[&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
TM->addEarlyAsPossiblePasses(PM);
});
Reflection Parameters
---------------------
@ -339,35 +351,16 @@ Flag Description
``__CUDA_FTZ=[0,1]`` Use optimized code paths that flush subnormals to zero
==================== ======================================================
The value of this flag is determined by the "nvvm-reflect-ftz" module flag.
The following sets the ftz flag to 1.
Invoking NVVMReflect
--------------------
To ensure that all dead code caused by the reflection pass is eliminated, it
is recommended that the reflection pass is executed early in the LLVM IR
optimization pipeline. The pass takes an optional mapping of reflection
parameter name to an integer value. This mapping can be specified as either a
command-line option to ``opt`` or as an LLVM ``StringMap<int>`` object when
programmatically creating a pass pipeline.
With ``opt``:
.. code-block:: text
# opt -nvvm-reflect -nvvm-reflect-list=<var>=<value>,<var>=<value> module.bc -o module.reflect.bc
With programmatic pass pipeline:
.. code-block:: c++
extern FunctionPass *llvm::createNVVMReflectPass(const StringMap<int>& Mapping);
StringMap<int> ReflectParams;
ReflectParams["__CUDA_FTZ"] = 1;
Passes.add(createNVVMReflectPass(ReflectParams));
.. code-block:: llvm
!llvm.module.flag = !{!0}
!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
(``i32 4`` indicates that the value set here overrides the value in another
module we link with. See the `LangRef <LangRef.html#module-flags-metadata>`
for details.)
Executing PTX
=============

View File

@ -48,7 +48,6 @@ ModulePass *createGenericToNVVMPass();
FunctionPass *createNVPTXInferAddressSpacesPass();
FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
FunctionPass *createNVVMReflectPass();
FunctionPass *createNVVMReflectPass(const StringMap<int> &Mapping);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();

View File

@ -10,11 +10,10 @@
// This pass replaces occurrences of __nvvm_reflect("foo") and llvm.nvvm.reflect
// with an integer.
//
// We choose the value we use by looking, in this order, at:
//
// * the -nvvm-reflect-list flag, which has the format "foo=1,bar=42",
// * the StringMap passed to the pass's constructor, and
// * metadata in the module itself.
// We choose the value we use by looking at metadata in the module itself. Note
// that we intentionally only have one way to choose these values, because other
// parts of LLVM (particularly, InstCombineCall) rely on being able to predict
// the values chosen by this pass.
//
// If we see an unknown string, we replace its call with 0.
//
@ -49,30 +48,17 @@ namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
namespace {
class NVVMReflect : public FunctionPass {
private:
StringMap<int> VarMap;
public:
static char ID;
NVVMReflect() : NVVMReflect(StringMap<int>()) {}
NVVMReflect(const StringMap<int> &Mapping)
: FunctionPass(ID), VarMap(Mapping) {
NVVMReflect() : FunctionPass(ID) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
setVarMap();
}
bool runOnFunction(Function &) override;
private:
void setVarMap();
};
}
FunctionPass *llvm::createNVVMReflectPass() { return new NVVMReflect(); }
FunctionPass *llvm::createNVVMReflectPass(const StringMap<int> &Mapping) {
return new NVVMReflect(Mapping);
}
static cl::opt<bool>
NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), cl::Hidden,
@ -83,35 +69,6 @@ INITIALIZE_PASS(NVVMReflect, "nvvm-reflect",
"Replace occurrences of __nvvm_reflect() calls with 0/1", false,
false)
static cl::list<std::string>
ReflectList("nvvm-reflect-list", cl::value_desc("name=<int>"), cl::Hidden,
cl::desc("A list of string=num assignments"),
cl::ValueRequired);
/// The command line can look as follows :
/// -nvvm-reflect-list a=1,b=2 -nvvm-reflect-list c=3,d=0 -R e=2
/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the
/// ReflectList vector. First, each of ReflectList[i] is 'split'
/// using "," as the delimiter. Then each of this part is split
/// using "=" as the delimiter.
void NVVMReflect::setVarMap() {
for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
DEBUG(dbgs() << "Option : " << ReflectList[i] << "\n");
SmallVector<StringRef, 4> NameValList;
StringRef(ReflectList[i]).split(NameValList, ',');
for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) {
SmallVector<StringRef, 2> NameValPair;
NameValList[j].split(NameValPair, '=');
assert(NameValPair.size() == 2 && "name=val expected");
std::stringstream ValStream(NameValPair[1]);
int Val;
ValStream >> Val;
assert((!(ValStream.fail())) && "integer value expected");
VarMap[NameValPair[0]] = Val;
}
}
}
bool NVVMReflect::runOnFunction(Function &F) {
if (!NVVMReflectEnabled)
return false;
@ -199,11 +156,10 @@ bool NVVMReflect::runOnFunction(Function &F) {
DEBUG(dbgs() << "Arg of _reflect : " << ReflectArg << "\n");
int ReflectVal = 0; // The default value is 0
auto Iter = VarMap.find(ReflectArg);
if (Iter != VarMap.end())
ReflectVal = Iter->second;
else if (ReflectArg == "__CUDA_FTZ") {
// Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag.
if (ReflectArg == "__CUDA_FTZ") {
// Try to pull __CUDA_FTZ from the nvvm-reflect-ftz module flag. Our
// choice here must be kept in sync with AutoUpgrade, which uses the same
// technique to detect whether ftz is enabled.
if (auto *Flag = mdconst::extract_or_null<ConstantInt>(
F.getParent()->getModuleFlag("nvvm-reflect-ftz")))
ReflectVal = Flag->getSExtValue();

View File

@ -1,30 +1,38 @@
; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=0 -O2 | FileCheck %s --check-prefix=USE_MUL_0
; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=1 -O2 | FileCheck %s --check-prefix=USE_MUL_1
; We run nvvm-reflect (and then optimize) this module twice, once with metadata
; that enables FTZ, and again with metadata that disables it.
@str = private unnamed_addr addrspace(4) constant [8 x i8] c"USE_MUL\00"
; RUN: cat %s > %t.noftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 0}' >> %t.noftz
; RUN: opt %t.noftz -S -nvvm-reflect -O2 \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_0 --check-prefix=CHECK
; RUN: cat %s > %t.ftz
; RUN: echo '!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}' >> %t.ftz
; RUN: opt %t.ftz -S -nvvm-reflect -O2 \
; RUN: | FileCheck %s --check-prefix=USE_FTZ_1 --check-prefix=CHECK
@str = private unnamed_addr addrspace(4) constant [11 x i8] c"__CUDA_FTZ\00"
declare i32 @__nvvm_reflect(i8*)
declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*)
; CHECK-LABEL: @foo
define float @foo(float %a, float %b) {
; USE_MUL_0: define float @foo
; USE_MUL_0-NOT: call i32 @__nvvm_reflect
; USE_MUL_1: define float @foo
; USE_MUL_1-NOT: call i32 @__nvvm_reflect
%ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
; CHECK-NOT: call i32 @__nvvm_reflect
%ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0))
%reflect = tail call i32 @__nvvm_reflect(i8* %ptr)
%cmp = icmp ugt i32 %reflect, 0
br i1 %cmp, label %use_mul, label %use_add
use_mul:
; USE_MUL_1: fmul float %a, %b
; USE_MUL_0-NOT: fadd float %a, %b
; USE_FTZ_1: fmul float %a, %b
; USE_FTZ_0-NOT: fadd float %a, %b
%ret1 = fmul float %a, %b
br label %exit
use_add:
; USE_MUL_0: fadd float %a, %b
; USE_MUL_1-NOT: fmul float %a, %b
; USE_FTZ_0: fadd float %a, %b
; USE_FTZ_1-NOT: fmul float %a, %b
%ret2 = fadd float %a, %b
br label %exit
@ -35,14 +43,12 @@ exit:
declare i32 @llvm.nvvm.reflect.p0i8(i8*)
; USE_MUL_0: define i32 @intrinsic
; USE_MUL_1: define i32 @intrinsic
; CHECK-LABEL: define i32 @intrinsic
define i32 @intrinsic() {
; USE_MUL_0-NOT: call i32 @llvm.nvvm.reflect
; USE_MUL_0: ret i32 0
; USE_MUL_1-NOT: call i32 @llvm.nvvm.reflect
; USE_MUL_1: ret i32 1
%ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @str, i32 0, i32 0))
; CHECK-NOT: call i32 @llvm.nvvm.reflect
; USE_FTZ_0: ret i32 0
; USE_FTZ_1: ret i32 1
%ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(4)* @str, i32 0, i32 0))
%reflect = tail call i32 @llvm.nvvm.reflect.p0i8(i8* %ptr)
ret i32 %reflect
}
@ -50,26 +56,24 @@ define i32 @intrinsic() {
; CUDA-7.0 passes __nvvm_reflect argument slightly differently.
; Verify that it works, too
@"$str" = private addrspace(1) constant [8 x i8] c"USE_MUL\00"
@"$str" = private addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00"
; CHECK-LABEL: @bar
define float @bar(float %a, float %b) {
; USE_MUL_0: define float @bar
; USE_MUL_0-NOT: call i32 @__nvvm_reflect
; USE_MUL_1: define float @bar
; USE_MUL_1-NOT: call i32 @__nvvm_reflect
%reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
; CHECK-NOT: call i32 @__nvvm_reflect
%reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
%cmp = icmp ne i32 %reflect, 0
br i1 %cmp, label %use_mul, label %use_add
use_mul:
; USE_MUL_1: fmul float %a, %b
; USE_MUL_0-NOT: fadd float %a, %b
; USE_FTZ_1: fmul float %a, %b
; USE_FTZ_0-NOT: fadd float %a, %b
%ret1 = fmul float %a, %b
br label %exit
use_add:
; USE_MUL_0: fadd float %a, %b
; USE_MUL_1-NOT: fmul float %a, %b
; USE_FTZ_0: fadd float %a, %b
; USE_FTZ_1-NOT: fmul float %a, %b
%ret2 = fadd float %a, %b
br label %exit
@ -77,3 +81,6 @@ exit:
%ret = phi float [%ret1, %use_mul], [%ret2, %use_add]
ret float %ret
}
!llvm.module.flags = !{!0}
; A module flag is added to the end of this file by the RUN lines at the top.