[Clang][OpenMP] Add partial support for Static Device Libraries
An archive containing device code object files can be passed to clang command line for linking. For each given offload target it creates a device specific archives which is either passed to llvm-link if the target is amdgpu, or to clang-nvlink-wrapper if the target is nvptx. -L/-l flags are used to specify these fat archives on the command line. E.g. clang++ -fopenmp -fopenmp-targets=nvptx64 main.cpp -L. -lmylib It currently doesn't support linking an archive directly, like: clang++ -fopenmp -fopenmp-targets=nvptx64 main.cpp libmylib.a Linking with x86 offload also does not work. Reviewed By: ye-luo Differential Revision: https://reviews.llvm.org/D105191
This commit is contained in:
parent
393581d8a5
commit
4c41170895
|
@ -114,6 +114,10 @@ const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
|
||||||
|
SubArchName,
|
||||||
|
/* bitcode SDL?*/ true,
|
||||||
|
/* PostClang Link? */ false);
|
||||||
// Add an intermediate output file.
|
// Add an intermediate output file.
|
||||||
CmdArgs.push_back("-o");
|
CmdArgs.push_back("-o");
|
||||||
const char *OutputFileName =
|
const char *OutputFileName =
|
||||||
|
|
|
@ -7734,12 +7734,28 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
|
||||||
Triples += Action::GetOffloadKindName(CurKind);
|
Triples += Action::GetOffloadKindName(CurKind);
|
||||||
Triples += '-';
|
Triples += '-';
|
||||||
Triples += CurTC->getTriple().normalize();
|
Triples += CurTC->getTriple().normalize();
|
||||||
if ((CurKind == Action::OFK_HIP || CurKind == Action::OFK_OpenMP ||
|
if ((CurKind == Action::OFK_HIP || CurKind == Action::OFK_Cuda) &&
|
||||||
CurKind == Action::OFK_Cuda) &&
|
|
||||||
CurDep->getOffloadingArch()) {
|
CurDep->getOffloadingArch()) {
|
||||||
Triples += '-';
|
Triples += '-';
|
||||||
Triples += CurDep->getOffloadingArch();
|
Triples += CurDep->getOffloadingArch();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Replace parsing of -march flag. Can be done by storing GPUArch
|
||||||
|
// with each toolchain.
|
||||||
|
StringRef GPUArchName;
|
||||||
|
if (CurKind == Action::OFK_OpenMP) {
|
||||||
|
// Extract GPUArch from -march argument in TC argument list.
|
||||||
|
for (unsigned ArgIndex = 0; ArgIndex < TCArgs.size(); ArgIndex++) {
|
||||||
|
auto ArchStr = StringRef(TCArgs.getArgString(ArgIndex));
|
||||||
|
auto Arch = ArchStr.startswith_insensitive("-march=");
|
||||||
|
if (Arch) {
|
||||||
|
GPUArchName = ArchStr.substr(7);
|
||||||
|
Triples += "-";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Triples += GPUArchName.str();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
CmdArgs.push_back(TCArgs.MakeArgString(Triples));
|
CmdArgs.push_back(TCArgs.MakeArgString(Triples));
|
||||||
|
|
||||||
|
@ -7813,12 +7829,27 @@ void OffloadBundler::ConstructJobMultipleOutputs(
|
||||||
Triples += '-';
|
Triples += '-';
|
||||||
Triples += Dep.DependentToolChain->getTriple().normalize();
|
Triples += Dep.DependentToolChain->getTriple().normalize();
|
||||||
if ((Dep.DependentOffloadKind == Action::OFK_HIP ||
|
if ((Dep.DependentOffloadKind == Action::OFK_HIP ||
|
||||||
Dep.DependentOffloadKind == Action::OFK_OpenMP ||
|
|
||||||
Dep.DependentOffloadKind == Action::OFK_Cuda) &&
|
Dep.DependentOffloadKind == Action::OFK_Cuda) &&
|
||||||
!Dep.DependentBoundArch.empty()) {
|
!Dep.DependentBoundArch.empty()) {
|
||||||
Triples += '-';
|
Triples += '-';
|
||||||
Triples += Dep.DependentBoundArch;
|
Triples += Dep.DependentBoundArch;
|
||||||
}
|
}
|
||||||
|
// TODO: Replace parsing of -march flag. Can be done by storing GPUArch
|
||||||
|
// with each toolchain.
|
||||||
|
StringRef GPUArchName;
|
||||||
|
if (Dep.DependentOffloadKind == Action::OFK_OpenMP) {
|
||||||
|
// Extract GPUArch from -march argument in TC argument list.
|
||||||
|
for (uint ArgIndex = 0; ArgIndex < TCArgs.size(); ArgIndex++) {
|
||||||
|
StringRef ArchStr = StringRef(TCArgs.getArgString(ArgIndex));
|
||||||
|
auto Arch = ArchStr.startswith_insensitive("-march=");
|
||||||
|
if (Arch) {
|
||||||
|
GPUArchName = ArchStr.substr(7);
|
||||||
|
Triples += "-";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Triples += GPUArchName.str();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
CmdArgs.push_back(TCArgs.MakeArgString(Triples));
|
CmdArgs.push_back(TCArgs.MakeArgString(Triples));
|
||||||
|
|
|
@ -34,6 +34,7 @@
|
||||||
#include "clang/Driver/Util.h"
|
#include "clang/Driver/Util.h"
|
||||||
#include "clang/Driver/XRayArgs.h"
|
#include "clang/Driver/XRayArgs.h"
|
||||||
#include "llvm/ADT/STLExtras.h"
|
#include "llvm/ADT/STLExtras.h"
|
||||||
|
#include "llvm/ADT/SmallSet.h"
|
||||||
#include "llvm/ADT/SmallString.h"
|
#include "llvm/ADT/SmallString.h"
|
||||||
#include "llvm/ADT/StringExtras.h"
|
#include "llvm/ADT/StringExtras.h"
|
||||||
#include "llvm/ADT/StringSwitch.h"
|
#include "llvm/ADT/StringSwitch.h"
|
||||||
|
@ -1587,6 +1588,292 @@ void tools::addX86AlignBranchArgs(const Driver &D, const ArgList &Args,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// SDLSearch: Search for Static Device Library
|
||||||
|
/// The search for SDL bitcode files is consistent with how static host
|
||||||
|
/// libraries are discovered. That is, the -l option triggers a search for
|
||||||
|
/// files in a set of directories called the LINKPATH. The host library search
|
||||||
|
/// procedure looks for a specific filename in the LINKPATH. The filename for
|
||||||
|
/// a host library is lib<libname>.a or lib<libname>.so. For SDLs, there is an
|
||||||
|
/// ordered-set of filenames that are searched. We call this ordered-set of
|
||||||
|
/// filenames as SEARCH-ORDER. Since an SDL can either be device-type specific,
|
||||||
|
/// architecture specific, or generic across all architectures, a naming
|
||||||
|
/// convention and search order is used where the file name embeds the
|
||||||
|
/// architecture name <arch-name> (nvptx or amdgcn) and the GPU device type
|
||||||
|
/// <device-name> such as sm_30 and gfx906. <device-name> is absent in case of
|
||||||
|
/// device-independent SDLs. To reduce congestion in host library directories,
|
||||||
|
/// the search first looks for files in the “libdevice” subdirectory. SDLs that
|
||||||
|
/// are bc files begin with the prefix “lib”.
|
||||||
|
///
|
||||||
|
/// Machine-code SDLs can also be managed as an archive (*.a file). The
|
||||||
|
/// convention has been to use the prefix “lib”. To avoid confusion with host
|
||||||
|
/// archive libraries, we use prefix "libbc-" for the bitcode SDL archives.
|
||||||
|
///
|
||||||
|
bool tools::SDLSearch(const Driver &D, const llvm::opt::ArgList &DriverArgs,
|
||||||
|
llvm::opt::ArgStringList &CC1Args,
|
||||||
|
SmallVector<std::string, 8> LibraryPaths, std::string Lib,
|
||||||
|
StringRef Arch, StringRef Target, bool isBitCodeSDL,
|
||||||
|
bool postClangLink) {
|
||||||
|
SmallVector<std::string, 12> SDLs;
|
||||||
|
|
||||||
|
std::string LibDeviceLoc = "/libdevice";
|
||||||
|
std::string LibBcPrefix = "/libbc-";
|
||||||
|
std::string LibPrefix = "/lib";
|
||||||
|
|
||||||
|
if (isBitCodeSDL) {
|
||||||
|
// SEARCH-ORDER for Bitcode SDLs:
|
||||||
|
// libdevice/libbc-<libname>-<arch-name>-<device-type>.a
|
||||||
|
// libbc-<libname>-<arch-name>-<device-type>.a
|
||||||
|
// libdevice/libbc-<libname>-<arch-name>.a
|
||||||
|
// libbc-<libname>-<arch-name>.a
|
||||||
|
// libdevice/libbc-<libname>.a
|
||||||
|
// libbc-<libname>.a
|
||||||
|
// libdevice/lib<libname>-<arch-name>-<device-type>.bc
|
||||||
|
// lib<libname>-<arch-name>-<device-type>.bc
|
||||||
|
// libdevice/lib<libname>-<arch-name>.bc
|
||||||
|
// lib<libname>-<arch-name>.bc
|
||||||
|
// libdevice/lib<libname>.bc
|
||||||
|
// lib<libname>.bc
|
||||||
|
|
||||||
|
for (StringRef Base : {LibBcPrefix, LibPrefix}) {
|
||||||
|
const auto *Ext = Base.contains(LibBcPrefix) ? ".a" : ".bc";
|
||||||
|
|
||||||
|
for (auto Suffix : {Twine(Lib + "-" + Arch + "-" + Target).str(),
|
||||||
|
Twine(Lib + "-" + Arch).str(), Twine(Lib).str()}) {
|
||||||
|
SDLs.push_back(Twine(LibDeviceLoc + Base + Suffix + Ext).str());
|
||||||
|
SDLs.push_back(Twine(Base + Suffix + Ext).str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// SEARCH-ORDER for Machine-code SDLs:
|
||||||
|
// libdevice/lib<libname>-<arch-name>-<device-type>.a
|
||||||
|
// lib<libname>-<arch-name>-<device-type>.a
|
||||||
|
// libdevice/lib<libname>-<arch-name>.a
|
||||||
|
// lib<libname>-<arch-name>.a
|
||||||
|
|
||||||
|
const auto *Ext = ".a";
|
||||||
|
|
||||||
|
for (auto Suffix : {Twine(Lib + "-" + Arch + "-" + Target).str(),
|
||||||
|
Twine(Lib + "-" + Arch).str()}) {
|
||||||
|
SDLs.push_back(Twine(LibDeviceLoc + LibPrefix + Suffix + Ext).str());
|
||||||
|
SDLs.push_back(Twine(LibPrefix + Suffix + Ext).str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The CUDA toolchain does not use a global device llvm-link before the LLVM
|
||||||
|
// backend generates ptx. So currently, the use of bitcode SDL for nvptx is
|
||||||
|
// only possible with post-clang-cc1 linking. Clang cc1 has a feature that
|
||||||
|
// will link libraries after clang compilation while the LLVM IR is still in
|
||||||
|
// memory. This utilizes a clang cc1 option called “-mlink-builtin-bitcode”.
|
||||||
|
// This is a clang -cc1 option that is generated by the clang driver. The
|
||||||
|
// option value must a full path to an existing file.
|
||||||
|
bool FoundSDL = false;
|
||||||
|
for (auto LPath : LibraryPaths) {
|
||||||
|
for (auto SDL : SDLs) {
|
||||||
|
auto FullName = Twine(LPath + SDL).str();
|
||||||
|
if (llvm::sys::fs::exists(FullName)) {
|
||||||
|
if (postClangLink)
|
||||||
|
CC1Args.push_back("-mlink-builtin-bitcode");
|
||||||
|
CC1Args.push_back(DriverArgs.MakeArgString(FullName));
|
||||||
|
FoundSDL = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (FoundSDL)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return FoundSDL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Search if a user provided archive file lib<libname>.a exists in any of
|
||||||
|
/// the library paths. If so, add a new command to clang-offload-bundler to
|
||||||
|
/// unbundle this archive and create a temporary device specific archive. Name
|
||||||
|
/// of this SDL is passed to the llvm-link (for amdgcn) or to the
|
||||||
|
/// clang-nvlink-wrapper (for nvptx) commands by the driver.
|
||||||
|
bool tools::GetSDLFromOffloadArchive(
|
||||||
|
Compilation &C, const Driver &D, const Tool &T, const JobAction &JA,
|
||||||
|
const InputInfoList &Inputs, const llvm::opt::ArgList &DriverArgs,
|
||||||
|
llvm::opt::ArgStringList &CC1Args, SmallVector<std::string, 8> LibraryPaths,
|
||||||
|
StringRef Lib, StringRef Arch, StringRef Target, bool isBitCodeSDL,
|
||||||
|
bool postClangLink) {
|
||||||
|
|
||||||
|
// We don't support bitcode archive bundles for nvptx
|
||||||
|
if (isBitCodeSDL && Arch.contains("nvptx"))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
bool FoundAOB = false;
|
||||||
|
SmallVector<std::string, 2> AOBFileNames;
|
||||||
|
std::string ArchiveOfBundles;
|
||||||
|
for (auto LPath : LibraryPaths) {
|
||||||
|
ArchiveOfBundles.clear();
|
||||||
|
|
||||||
|
AOBFileNames.push_back(Twine(LPath + "/libdevice/lib" + Lib + ".a").str());
|
||||||
|
AOBFileNames.push_back(Twine(LPath + "/lib" + Lib + ".a").str());
|
||||||
|
|
||||||
|
for (auto AOB : AOBFileNames) {
|
||||||
|
if (llvm::sys::fs::exists(AOB)) {
|
||||||
|
ArchiveOfBundles = AOB;
|
||||||
|
FoundAOB = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!FoundAOB)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
StringRef Prefix = isBitCodeSDL ? "libbc-" : "lib";
|
||||||
|
std::string OutputLib = D.GetTemporaryPath(
|
||||||
|
Twine(Prefix + Lib + "-" + Arch + "-" + Target).str(), "a");
|
||||||
|
|
||||||
|
C.addTempFile(C.getArgs().MakeArgString(OutputLib.c_str()));
|
||||||
|
|
||||||
|
ArgStringList CmdArgs;
|
||||||
|
SmallString<128> DeviceTriple;
|
||||||
|
DeviceTriple += Action::GetOffloadKindName(JA.getOffloadingDeviceKind());
|
||||||
|
DeviceTriple += '-';
|
||||||
|
std::string NormalizedTriple = T.getToolChain().getTriple().normalize();
|
||||||
|
DeviceTriple += NormalizedTriple;
|
||||||
|
if (!Target.empty()) {
|
||||||
|
DeviceTriple += '-';
|
||||||
|
DeviceTriple += Target;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string UnbundleArg("-unbundle");
|
||||||
|
std::string TypeArg("-type=a");
|
||||||
|
std::string InputArg("-inputs=" + ArchiveOfBundles);
|
||||||
|
std::string OffloadArg("-targets=" + std::string(DeviceTriple));
|
||||||
|
std::string OutputArg("-outputs=" + OutputLib);
|
||||||
|
|
||||||
|
const char *UBProgram = DriverArgs.MakeArgString(
|
||||||
|
T.getToolChain().GetProgramPath("clang-offload-bundler"));
|
||||||
|
|
||||||
|
ArgStringList UBArgs;
|
||||||
|
UBArgs.push_back(C.getArgs().MakeArgString(UnbundleArg.c_str()));
|
||||||
|
UBArgs.push_back(C.getArgs().MakeArgString(TypeArg.c_str()));
|
||||||
|
UBArgs.push_back(C.getArgs().MakeArgString(InputArg.c_str()));
|
||||||
|
UBArgs.push_back(C.getArgs().MakeArgString(OffloadArg.c_str()));
|
||||||
|
UBArgs.push_back(C.getArgs().MakeArgString(OutputArg.c_str()));
|
||||||
|
|
||||||
|
// Add this flag to not exit from clang-offload-bundler if no compatible
|
||||||
|
// code object is found in heterogenous archive library.
|
||||||
|
std::string AdditionalArgs("-allow-missing-bundles");
|
||||||
|
UBArgs.push_back(C.getArgs().MakeArgString(AdditionalArgs.c_str()));
|
||||||
|
|
||||||
|
C.addCommand(std::make_unique<Command>(
|
||||||
|
JA, T, ResponseFileSupport::AtFileCurCP(), UBProgram, UBArgs, Inputs,
|
||||||
|
InputInfo(&JA, C.getArgs().MakeArgString(OutputLib.c_str()))));
|
||||||
|
if (postClangLink)
|
||||||
|
CC1Args.push_back("-mlink-builtin-bitcode");
|
||||||
|
|
||||||
|
CC1Args.push_back(DriverArgs.MakeArgString(OutputLib));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return FoundAOB;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wrapper function used by driver for adding SDLs during link phase.
|
||||||
|
void tools::AddStaticDeviceLibsLinking(Compilation &C, const Tool &T,
|
||||||
|
const JobAction &JA,
|
||||||
|
const InputInfoList &Inputs,
|
||||||
|
const llvm::opt::ArgList &DriverArgs,
|
||||||
|
llvm::opt::ArgStringList &CC1Args,
|
||||||
|
StringRef Arch, StringRef Target,
|
||||||
|
bool isBitCodeSDL, bool postClangLink) {
|
||||||
|
AddStaticDeviceLibs(&C, &T, &JA, &Inputs, C.getDriver(), DriverArgs, CC1Args,
|
||||||
|
Arch, Target, isBitCodeSDL, postClangLink);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wrapper function used for post clang linking of bitcode SDLS for nvptx by
|
||||||
|
// the CUDA toolchain.
|
||||||
|
void tools::AddStaticDeviceLibsPostLinking(const Driver &D,
|
||||||
|
const llvm::opt::ArgList &DriverArgs,
|
||||||
|
llvm::opt::ArgStringList &CC1Args,
|
||||||
|
StringRef Arch, StringRef Target,
|
||||||
|
bool isBitCodeSDL, bool postClangLink) {
|
||||||
|
AddStaticDeviceLibs(nullptr, nullptr, nullptr, nullptr, D, DriverArgs,
|
||||||
|
CC1Args, Arch, Target, isBitCodeSDL, postClangLink);
|
||||||
|
}
|
||||||
|
|
||||||
|
// User defined Static Device Libraries(SDLs) can be passed to clang for
|
||||||
|
// offloading GPU compilers. Like static host libraries, the use of a SDL is
|
||||||
|
// specified with the -l command line option. The primary difference between
|
||||||
|
// host and SDLs is the filenames for SDLs (refer SEARCH-ORDER for Bitcode SDLs
|
||||||
|
// and SEARCH-ORDER for Machine-code SDLs for the naming convention).
|
||||||
|
// SDLs are of following types:
|
||||||
|
//
|
||||||
|
// * Bitcode SDLs: They can either be a *.bc file or an archive of *.bc files.
|
||||||
|
// For NVPTX, these libraries are post-clang linked following each
|
||||||
|
// compilation. For AMDGPU, these libraries are linked one time
|
||||||
|
// during the application link phase.
|
||||||
|
//
|
||||||
|
// * Machine-code SDLs: They are archive files. For NVPTX, the archive members
|
||||||
|
// contain cubin for Nvidia GPUs and are linked one time during the
|
||||||
|
// link phase by the CUDA SDK linker called nvlink. For AMDGPU, the
|
||||||
|
// process for machine code SDLs is still in development. But they
|
||||||
|
// will be linked by the LLVM tool lld.
|
||||||
|
//
|
||||||
|
// * Bundled objects that contain both host and device codes: Bundled objects
|
||||||
|
// may also contain library code compiled from source. For NVPTX, the
|
||||||
|
// bundle contains cubin. For AMDGPU, the bundle contains bitcode.
|
||||||
|
//
|
||||||
|
// For Bitcode and Machine-code SDLs, current compiler toolchains hardcode the
|
||||||
|
// inclusion of specific SDLs such as math libraries and the OpenMP device
|
||||||
|
// library libomptarget.
|
||||||
|
void tools::AddStaticDeviceLibs(Compilation *C, const Tool *T,
|
||||||
|
const JobAction *JA,
|
||||||
|
const InputInfoList *Inputs, const Driver &D,
|
||||||
|
const llvm::opt::ArgList &DriverArgs,
|
||||||
|
llvm::opt::ArgStringList &CC1Args,
|
||||||
|
StringRef Arch, StringRef Target,
|
||||||
|
bool isBitCodeSDL, bool postClangLink) {
|
||||||
|
|
||||||
|
SmallVector<std::string, 8> LibraryPaths;
|
||||||
|
// Add search directories from LIBRARY_PATH env variable
|
||||||
|
llvm::Optional<std::string> LibPath =
|
||||||
|
llvm::sys::Process::GetEnv("LIBRARY_PATH");
|
||||||
|
if (LibPath) {
|
||||||
|
SmallVector<StringRef, 8> Frags;
|
||||||
|
const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
|
||||||
|
llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
|
||||||
|
for (StringRef Path : Frags)
|
||||||
|
LibraryPaths.emplace_back(Path.trim());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add directories from user-specified -L options
|
||||||
|
for (std::string Search_Dir : DriverArgs.getAllArgValues(options::OPT_L))
|
||||||
|
LibraryPaths.emplace_back(Search_Dir);
|
||||||
|
|
||||||
|
// Add path to lib-debug folders
|
||||||
|
SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(D.Dir);
|
||||||
|
llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
|
||||||
|
LibraryPaths.emplace_back(DefaultLibPath.c_str());
|
||||||
|
|
||||||
|
// Build list of Static Device Libraries SDLs specified by -l option
|
||||||
|
llvm::SmallSet<std::string, 16> SDLNames;
|
||||||
|
static const StringRef HostOnlyArchives[] = {
|
||||||
|
"omp", "cudart", "m", "gcc", "gcc_s", "pthread", "hip_hcc"};
|
||||||
|
for (auto SDLName : DriverArgs.getAllArgValues(options::OPT_l)) {
|
||||||
|
if (!HostOnlyArchives->contains(SDLName)) {
|
||||||
|
SDLNames.insert(SDLName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The search stops as soon as an SDL file is found. The driver then provides
|
||||||
|
// the full filename of the SDL to the llvm-link or clang-nvlink-wrapper
|
||||||
|
// command. If no SDL is found after searching each LINKPATH with
|
||||||
|
// SEARCH-ORDER, it is possible that an archive file lib<libname>.a exists
|
||||||
|
// and may contain bundled object files.
|
||||||
|
for (auto SDLName : SDLNames) {
|
||||||
|
// This is the only call to SDLSearch
|
||||||
|
if (!SDLSearch(D, DriverArgs, CC1Args, LibraryPaths, SDLName, Arch, Target,
|
||||||
|
isBitCodeSDL, postClangLink)) {
|
||||||
|
GetSDLFromOffloadArchive(*C, D, *T, *JA, *Inputs, DriverArgs, CC1Args,
|
||||||
|
LibraryPaths, SDLName, Arch, Target,
|
||||||
|
isBitCodeSDL, postClangLink);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static llvm::opt::Arg *
|
static llvm::opt::Arg *
|
||||||
getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
|
getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
|
||||||
// The last of -mcode-object-v3, -mno-code-object-v3 and
|
// The last of -mcode-object-v3, -mno-code-object-v3 and
|
||||||
|
|
|
@ -49,6 +49,39 @@ void AddRunTimeLibs(const ToolChain &TC, const Driver &D,
|
||||||
llvm::opt::ArgStringList &CmdArgs,
|
llvm::opt::ArgStringList &CmdArgs,
|
||||||
const llvm::opt::ArgList &Args);
|
const llvm::opt::ArgList &Args);
|
||||||
|
|
||||||
|
void AddStaticDeviceLibsLinking(Compilation &C, const Tool &T,
|
||||||
|
const JobAction &JA,
|
||||||
|
const InputInfoList &Inputs,
|
||||||
|
const llvm::opt::ArgList &DriverArgs,
|
||||||
|
llvm::opt::ArgStringList &CmdArgs,
|
||||||
|
StringRef Arch, StringRef Target,
|
||||||
|
bool isBitCodeSDL, bool postClangLink);
|
||||||
|
void AddStaticDeviceLibsPostLinking(const Driver &D,
|
||||||
|
const llvm::opt::ArgList &DriverArgs,
|
||||||
|
llvm::opt::ArgStringList &CmdArgs,
|
||||||
|
StringRef Arch, StringRef Target,
|
||||||
|
bool isBitCodeSDL, bool postClangLink);
|
||||||
|
void AddStaticDeviceLibs(Compilation *C, const Tool *T, const JobAction *JA,
|
||||||
|
const InputInfoList *Inputs, const Driver &D,
|
||||||
|
const llvm::opt::ArgList &DriverArgs,
|
||||||
|
llvm::opt::ArgStringList &CmdArgs, StringRef Arch,
|
||||||
|
StringRef Target, bool isBitCodeSDL,
|
||||||
|
bool postClangLink);
|
||||||
|
|
||||||
|
bool SDLSearch(const Driver &D, const llvm::opt::ArgList &DriverArgs,
|
||||||
|
llvm::opt::ArgStringList &CmdArgs,
|
||||||
|
SmallVector<std::string, 8> LibraryPaths, std::string Lib,
|
||||||
|
StringRef Arch, StringRef Target, bool isBitCodeSDL,
|
||||||
|
bool postClangLink);
|
||||||
|
|
||||||
|
bool GetSDLFromOffloadArchive(Compilation &C, const Driver &D, const Tool &T,
|
||||||
|
const JobAction &JA, const InputInfoList &Inputs,
|
||||||
|
const llvm::opt::ArgList &DriverArgs,
|
||||||
|
llvm::opt::ArgStringList &CC1Args,
|
||||||
|
SmallVector<std::string, 8> LibraryPaths,
|
||||||
|
StringRef Lib, StringRef Arch, StringRef Target,
|
||||||
|
bool isBitCodeSDL, bool postClangLink);
|
||||||
|
|
||||||
const char *SplitDebugName(const JobAction &JA, const llvm::opt::ArgList &Args,
|
const char *SplitDebugName(const JobAction &JA, const llvm::opt::ArgList &Args,
|
||||||
const InputInfo &Input, const InputInfo &Output);
|
const InputInfo &Input, const InputInfo &Output);
|
||||||
|
|
||||||
|
|
|
@ -610,8 +610,11 @@ void NVPTX::OpenMPLinker::ConstructJob(Compilation &C, const JobAction &JA,
|
||||||
CmdArgs.push_back(CubinF);
|
CmdArgs.push_back(CubinF);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "nvptx", GPUArch,
|
||||||
|
false, false);
|
||||||
|
|
||||||
const char *Exec =
|
const char *Exec =
|
||||||
Args.MakeArgString(getToolChain().GetProgramPath("nvlink"));
|
Args.MakeArgString(getToolChain().GetProgramPath("clang-nvlink-wrapper"));
|
||||||
C.addCommand(std::make_unique<Command>(
|
C.addCommand(std::make_unique<Command>(
|
||||||
JA, *this,
|
JA, *this,
|
||||||
ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
|
ResponseFileSupport{ResponseFileSupport::RF_Full, llvm::sys::WEM_UTF8,
|
||||||
|
@ -741,6 +744,8 @@ void CudaToolChain::addClangTargetOptions(
|
||||||
|
|
||||||
addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
|
addOpenMPDeviceRTL(getDriver(), DriverArgs, CC1Args, BitcodeSuffix,
|
||||||
getTriple());
|
getTriple());
|
||||||
|
AddStaticDeviceLibsPostLinking(getDriver(), DriverArgs, CC1Args, "nvptx", GpuArch,
|
||||||
|
/* bitcode SDL?*/ true, /* PostClang Link? */ true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -0,0 +1,81 @@
|
||||||
|
// REQUIRES: clang-driver
|
||||||
|
// REQUIRES: x86-registered-target
|
||||||
|
// REQUIRES: amdgpu-registered-target
|
||||||
|
|
||||||
|
// See the steps to create a fat archive are given at the end of the file.
|
||||||
|
|
||||||
|
// Given a FatArchive, clang-offload-bundler should be called to create a
|
||||||
|
// device specific archive, which should be passed to llvm-link.
|
||||||
|
// RUN: %clang -O2 -### -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 %s -L%S/Inputs/openmp_static_device_link -lFatArchive 2>&1 | FileCheck %s
|
||||||
|
// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-emit-llvm-bc"{{.*}}"-target-cpu" "[[GPU:gfx[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.bc]]" "-x" "c++"{{.*}}.cpp
|
||||||
|
// CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-inputs={{.*}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-amdgcn-amd-amdhsa-[[GPU]]" "-outputs=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles"
|
||||||
|
// CHECK: llvm-link{{.*}}"[[HOSTBC]]" "[[DEVICESPECIFICARCHIVE]]" "-o" "{{.*}}-[[GPU]]-linked-{{.*}}.bc"
|
||||||
|
// CHECK: ld"{{.*}}" "-L{{.*}}/Inputs/openmp_static_device_link" "{{.*}} "-lFatArchive" "{{.*}}" "-lomp{{.*}}-lomptarget"
|
||||||
|
// expected-no-diagnostics
|
||||||
|
|
||||||
|
#ifndef HEADER
|
||||||
|
#define HEADER
|
||||||
|
|
||||||
|
#define N 10
|
||||||
|
|
||||||
|
#pragma omp declare target
|
||||||
|
// Functions defined in Fat Archive.
|
||||||
|
extern "C" void func_present(float *, float *, unsigned);
|
||||||
|
|
||||||
|
#ifdef MISSING
|
||||||
|
// Function not defined in the fat archive.
|
||||||
|
extern "C" void func_missing(float *, float *, unsigned);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#pragma omp end declare target
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
float in[N], out[N], sum = 0;
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (i = 0; i < N; ++i) {
|
||||||
|
in[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
func_present(in, out, N); // Returns out[i] = a[i] * 0
|
||||||
|
|
||||||
|
#ifdef MISSING
|
||||||
|
func_missing(in, out, N); // Should throw an error here
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#pragma omp parallel for reduction(+ \
|
||||||
|
: sum)
|
||||||
|
for (i = 0; i < N; ++i)
|
||||||
|
sum += out[i];
|
||||||
|
|
||||||
|
if (!sum)
|
||||||
|
return 0;
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/***********************************************
|
||||||
|
Steps to create Fat Archive (libFatArchive.a)
|
||||||
|
************************************************
|
||||||
|
***************** File: func_1.c ***************
|
||||||
|
void func_present(float* in, float* out, unsigned n){
|
||||||
|
unsigned i;
|
||||||
|
#pragma omp target teams distribute parallel for map(to: in[0:n]) map(from: out[0:n])
|
||||||
|
for(i=0; i<n; ++i){
|
||||||
|
out[i] = in[i] * 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*************************************************
|
||||||
|
1. Compile source file(s) to generate object file(s)
|
||||||
|
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_1.c -o func_1_gfx906.o
|
||||||
|
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_1.c -o func_1_gfx908.o
|
||||||
|
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_2.c -o func_2_gfx906.o
|
||||||
|
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_2.c -o func_2_gfx908.o
|
||||||
|
clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_1.c -o func_1_nvptx.o
|
||||||
|
clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_2.c -o func_2_nvptx.o
|
||||||
|
|
||||||
|
2. Create a fat archive by combining all the object file(s)
|
||||||
|
llvm-ar cr libFatArchive.a func_1_gfx906.o func_1_gfx908.o func_2_gfx906.o func_2_gfx908.o func_1_nvptx.o func_2_nvptx.o
|
||||||
|
************************************************/
|
|
@ -0,0 +1,81 @@
|
||||||
|
// REQUIRES: clang-driver
|
||||||
|
// REQUIRES: x86-registered-target
|
||||||
|
// REQUIRES: nvptx-registered-target
|
||||||
|
|
||||||
|
// See the steps to create a fat archive are given at the end of the file.
|
||||||
|
|
||||||
|
// Given a FatArchive, clang-offload-bundler should be called to create a
|
||||||
|
// device specific archive, which should be passed to clang-nvlink-wrapper.
|
||||||
|
// RUN: %clang -O2 -### -fopenmp -fopenmp-targets=nvptx64 %s -L%S/Inputs/openmp_static_device_link -lFatArchive 2>&1 | FileCheck %s
|
||||||
|
// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64"{{.*}}"-target-cpu" "[[GPU:sm_[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.s]]" "-x" "c++"{{.*}}.cpp
|
||||||
|
// CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-inputs={{.*}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-nvptx64-[[GPU]]" "-outputs=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles"
|
||||||
|
// CHECK: clang-nvlink-wrapper{{.*}}"-o" "{{.*}}.out" "-arch" "[[GPU]]" "{{.*}}[[DEVICESPECIFICARCHIVE]]"
|
||||||
|
// CHECK: ld"{{.*}}" "-L{{.*}}/Inputs/openmp_static_device_link" "{{.*}} "-lFatArchive" "{{.*}}" "-lomp{{.*}}-lomptarget"
|
||||||
|
// expected-no-diagnostics
|
||||||
|
|
||||||
|
#ifndef HEADER
|
||||||
|
#define HEADER
|
||||||
|
|
||||||
|
#define N 10
|
||||||
|
|
||||||
|
#pragma omp declare target
|
||||||
|
// Functions defined in Fat Archive.
|
||||||
|
extern "C" void func_present(float *, float *, unsigned);
|
||||||
|
|
||||||
|
#ifdef MISSING
|
||||||
|
// Function not defined in the fat archive.
|
||||||
|
extern "C" void func_missing(float *, float *, unsigned);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#pragma omp end declare target
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
float in[N], out[N], sum = 0;
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (i = 0; i < N; ++i) {
|
||||||
|
in[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
func_present(in, out, N); // Returns out[i] = a[i] * 0
|
||||||
|
|
||||||
|
#ifdef MISSING
|
||||||
|
func_missing(in, out, N); // Should throw an error here
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#pragma omp parallel for reduction(+ \
|
||||||
|
: sum)
|
||||||
|
for (i = 0; i < N; ++i)
|
||||||
|
sum += out[i];
|
||||||
|
|
||||||
|
if (!sum)
|
||||||
|
return 0;
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/***********************************************
|
||||||
|
Steps to create Fat Archive (libFatArchive.a)
|
||||||
|
************************************************
|
||||||
|
***************** File: func_1.c ***************
|
||||||
|
void func_present(float* in, float* out, unsigned n){
|
||||||
|
unsigned i;
|
||||||
|
#pragma omp target teams distribute parallel for map(to: in[0:n]) map(from: out[0:n])
|
||||||
|
for(i=0; i<n; ++i){
|
||||||
|
out[i] = in[i] * 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*************************************************
|
||||||
|
1. Compile source file(s) to generate object file(s)
|
||||||
|
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_1.c -o func_1_gfx906.o
|
||||||
|
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_1.c -o func_1_gfx908.o
|
||||||
|
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx906 -c func_2.c -o func_2_gfx906.o
|
||||||
|
clang -O2 -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx908 -c func_2.c -o func_2_gfx908.o
|
||||||
|
clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_1.c -o func_1_nvptx.o
|
||||||
|
clang -O2 -fopenmp -fopenmp-targets=nvptx64 -c func_2.c -o func_2_nvptx.o
|
||||||
|
|
||||||
|
2. Create a fat archive by combining all the object file(s)
|
||||||
|
llvm-ar cr libFatArchive.a func_1_gfx906.o func_1_gfx908.o func_2_gfx906.o func_2_gfx908.o func_1_nvptx.o func_2_nvptx.o
|
||||||
|
************************************************/
|
|
@ -180,6 +180,28 @@ struct OffloadTargetInfo {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static StringRef getDeviceFileExtension(StringRef Device) {
|
||||||
|
if (Device.contains("gfx"))
|
||||||
|
return ".bc";
|
||||||
|
if (Device.contains("sm_"))
|
||||||
|
return ".cubin";
|
||||||
|
|
||||||
|
WithColor::warning() << "Could not determine extension for archive"
|
||||||
|
"members, using \".o\"\n";
|
||||||
|
return ".o";
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string getDeviceLibraryFileName(StringRef BundleFileName,
|
||||||
|
StringRef Device) {
|
||||||
|
StringRef LibName = sys::path::stem(BundleFileName);
|
||||||
|
StringRef Extension = getDeviceFileExtension(Device);
|
||||||
|
|
||||||
|
std::string Result;
|
||||||
|
Result += LibName;
|
||||||
|
Result += Extension;
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
|
||||||
/// Generic file handler interface.
|
/// Generic file handler interface.
|
||||||
class FileHandler {
|
class FileHandler {
|
||||||
public:
|
public:
|
||||||
|
@ -1229,7 +1251,9 @@ static Error UnbundleArchive() {
|
||||||
BundledObjectFileName.assign(BundledObjectFile);
|
BundledObjectFileName.assign(BundledObjectFile);
|
||||||
auto OutputBundleName =
|
auto OutputBundleName =
|
||||||
Twine(llvm::sys::path::stem(BundledObjectFileName) + "-" +
|
Twine(llvm::sys::path::stem(BundledObjectFileName) + "-" +
|
||||||
CodeObject)
|
CodeObject +
|
||||||
|
getDeviceLibraryFileName(BundledObjectFileName,
|
||||||
|
CodeObjectInfo.GPUArch))
|
||||||
.str();
|
.str();
|
||||||
// Replace ':' in optional target feature list with '_' to ensure
|
// Replace ':' in optional target feature list with '_' to ensure
|
||||||
// cross-platform validity.
|
// cross-platform validity.
|
||||||
|
|
Loading…
Reference in New Issue