[OpenMP] Add function for setting LIBOMPTARGET_INFO at runtime

Summary:
This patch adds a new runtime function __tgt_set_info_flag that allows the
user to set the information level at runtime without using the environment
variable. Using this will require an extern function, but will eventually be
added into an auxilliary library for OpenMP support functions.

This patch required moving the current InfoLevel to a global variable which must
be instantiated by each plugin.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D100774
This commit is contained in:
Joseph Huber 2021-04-21 17:31:09 -04:00 committed by Huber, Joseph
parent ae209aa9ec
commit 2b6f20082e
15 changed files with 83 additions and 14 deletions

View File

@ -189,6 +189,24 @@ shows that ``D`` will be copied back from the device once the OpenMP device
kernel region ends even though it isn't written to. Finally, at the end of the
OpenMP data region the entries for ``X`` and ``Y`` are removed from the table.
The information level can be controlled at runtime using an internal
libomptarget library call ``__tgt_set_info_flag``. This allows for different
levels of information to be enabled or disabled for certain regions of code.
Using this requires declaring the function signature as an external function so
it can be linked with the runtime library.
.. code-block:: c++
extern "C" void __tgt_set_info_flag(uint32_t);
extern foo();
int main() {
__tgt_set_info_flag(0x10);
#pragma omp target
foo();
}
.. _libopenmptarget_errors:
Errors:

View File

@ -37,6 +37,7 @@
#ifndef _OMPTARGET_DEBUG_H
#define _OMPTARGET_DEBUG_H
#include <atomic>
#include <mutex>
/// 32-Bit field data attributes controlling information presented to the user.
@ -64,16 +65,18 @@ enum OpenMPInfoType : uint32_t {
#define USED
#endif
// Interface to the InfoLevel variable defined by each library.
extern std::atomic<uint32_t> InfoLevel;
// Add __attribute__((used)) to work around a bug in gcc 5/6.
USED static inline uint32_t getInfoLevel() {
static uint32_t InfoLevel = 0;
static std::once_flag Flag{};
std::call_once(Flag, []() {
if (char *EnvStr = getenv("LIBOMPTARGET_INFO"))
InfoLevel = std::stoi(EnvStr);
InfoLevel.store(std::stoi(EnvStr));
});
return InfoLevel;
return InfoLevel.load();
}
// Add __attribute__((used)) to work around a bug in gcc 5/6.

View File

@ -331,6 +331,8 @@ void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount);
void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
uint64_t loop_tripcount);
void __tgt_set_info_flag(uint32_t);
#ifdef __cplusplus
}
#endif

View File

@ -139,6 +139,9 @@ int32_t __tgt_rtl_run_target_team_region_async(
// error code.
int32_t __tgt_rtl_synchronize(int32_t ID, __tgt_async_info *AsyncInfo);
// Set plugin's internal information flag externally.
void __tgt_rtl_set_info_flag(uint32_t);
#ifdef __cplusplus
}
#endif

View File

@ -1966,3 +1966,6 @@ int32_t __tgt_rtl_synchronize(int32_t device_id, __tgt_async_info *AsyncInfo) {
}
return OFFLOAD_SUCCESS;
}
// AMDGPU plugin's internal InfoLevel.
std::atomic<uint32_t> InfoLevel;

View File

@ -1251,6 +1251,13 @@ int32_t __tgt_rtl_synchronize(int32_t device_id,
return DeviceRTL.synchronize(device_id, async_info_ptr);
}
void __tgt_rtl_set_info_flag(uint32_t NewInfoLevel) {
InfoLevel.store(NewInfoLevel);
}
#ifdef __cplusplus
}
#endif
// Cuda plugin's internal InfoLevel.
std::atomic<uint32_t> InfoLevel;

View File

@ -22,6 +22,7 @@ VERS1.0 {
__tgt_rtl_register_lib;
__tgt_rtl_unregister_lib;
__tgt_rtl_supports_empty_images;
__tgt_rtl_set_info_flag;
local:
*;
};

View File

@ -335,3 +335,6 @@ int32_t __tgt_rtl_run_target_region(int32_t device_id, void *tgt_entry_ptr,
#ifdef __cplusplus
}
#endif
// Elf-64 plugin's internal InfoLevel.
std::atomic<uint32_t> InfoLevel;

View File

@ -173,3 +173,6 @@ int32_t __tgt_rtl_run_target_team_region_async(
#ifdef __cplusplus
}
#endif
// Remote Offloading interal InfoLevel.
std::atomic<uint32_t> InfoLevel;

View File

@ -453,3 +453,6 @@ int32_t __tgt_rtl_run_target_region(int32_t ID, void *Entry, void **Args,
}
int32_t __tgt_rtl_supports_empty_images() { return 1; }
// VEC plugin's internal InfoLevel.
std::atomic<uint32_t> InfoLevel;

View File

@ -39,6 +39,7 @@ VERS1.0 {
llvm_omp_target_alloc_host;
llvm_omp_target_alloc_shared;
llvm_omp_target_alloc_device;
__tgt_set_info_flag;
local:
*;
};

View File

@ -457,3 +457,14 @@ EXTERN void __kmpc_push_target_tripcount_mapper(ident_t *loc, int64_t device_id,
loop_tripcount);
PM->TblMapMtx.unlock();
}
EXTERN void __tgt_set_info_flag(uint32_t NewInfoLevel) {
InfoLevel.store(NewInfoLevel);
for (auto &R : PM->RTLs.AllRTLs) {
if (R.set_info_flag)
R.set_info_flag(NewInfoLevel);
}
}
// Libomptarget's InfoLevel storage.
std::atomic<uint32_t> InfoLevel;

View File

@ -175,6 +175,8 @@ void RTLsTy::LoadRTLs() {
dlsym(dynlib_handle, "__tgt_rtl_unregister_lib");
*((void **)&R.supports_empty_images) =
dlsym(dynlib_handle, "__tgt_rtl_supports_empty_images");
*((void **)&R.set_info_flag) =
dlsym(dynlib_handle, "__tgt_rtl_set_info_flag");
}
DP("RTLs loaded!\n");

View File

@ -55,6 +55,7 @@ struct RTLInfoTy {
typedef int64_t(synchronize_ty)(int32_t, __tgt_async_info *);
typedef int32_t (*register_lib_ty)(__tgt_bin_desc *);
typedef int32_t(supports_empty_images_ty)();
typedef void(set_info_flag_ty)(uint32_t);
int32_t Idx = -1; // RTL index, index is the number of devices
// of other RTLs that were registered before,
@ -91,6 +92,7 @@ struct RTLInfoTy {
register_lib_ty register_lib = nullptr;
register_lib_ty unregister_lib = nullptr;
supports_empty_images_ty *supports_empty_images = nullptr;
set_info_flag_ty *set_info_flag = nullptr;
// Are there images associated with this RTL.
bool isUsed = false;

View File

@ -5,6 +5,8 @@
#define N 64
extern void __tgt_set_info_flag(unsigned);
int main() {
int A[N];
int B[N];
@ -12,27 +14,27 @@ int main() {
int val = 1;
// INFO: CUDA device 0 info: Device supports up to {{.*}} CUDA blocks and {{.*}} threads with a warp size of {{.*}}
// INFO: Libomptarget device 0 info: Entering OpenMP data region at info.c:39:1 with 3 arguments:
// INFO: Libomptarget device 0 info: Entering OpenMP data region at info.c:{{[0-9]+}}:1 with 3 arguments:
// INFO: Libomptarget device 0 info: alloc(A[0:64])[256]
// INFO: Libomptarget device 0 info: tofrom(B[0:64])[256]
// INFO: Libomptarget device 0 info: to(C[0:64])[256]
// INFO: Libomptarget device 0 info: Creating new map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=A[0:64]
// INFO: Libomptarget device 0 info: Creating new map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=B[0:64]
// INFO: Libomptarget device 0 info: Creating new map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=C[0:64]
// INFO: Libomptarget device 0 info: OpenMP Host-Device pointer mappings after block at info.c:39:1:
// INFO: Libomptarget device 0 info: OpenMP Host-Device pointer mappings after block at info.c:{{[0-9]+}}:1:
// INFO: Libomptarget device 0 info: Host Ptr Target Ptr Size (B) RefCount Declaration
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 C[0:64] at info.c:11:7
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 B[0:64] at info.c:10:7
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 A[0:64] at info.c:9:7
// INFO: Libomptarget device 0 info: Entering OpenMP kernel at info.c:40:1 with 1 arguments:
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 C[0:64] at info.c:{{[0-9]+}}:7
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 B[0:64] at info.c:{{[0-9]+}}:7
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 A[0:64] at info.c:{{[0-9]+}}:7
// INFO: Libomptarget device 0 info: Entering OpenMP kernel at info.c:{{[0-9]+}}:1 with 1 arguments:
// INFO: Libomptarget device 0 info: firstprivate(val)[4]
// INFO: CUDA device 0 info: Launching kernel {{.*}} with {{.*}} and {{.*}} threads in {{.*}} mode
// INFO: Libomptarget device 0 info: OpenMP Host-Device pointer mappings after block at info.c:40:1:
// INFO: Libomptarget device 0 info: OpenMP Host-Device pointer mappings after block at info.c:{{[0-9]+}}:1:
// INFO: Libomptarget device 0 info: Host Ptr Target Ptr Size (B) RefCount Declaration
// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 C[0:64] at info.c:11:7
// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 B[0:64] at info.c:10:7
// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 A[0:64] at info.c:9:7
// INFO: Libomptarget device 0 info: Exiting OpenMP data region at info.c:39:1
// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 C[0:64] at info.c:{{[0-9]+}}:7
// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 B[0:64] at info.c:{{[0-9]+}}:7
// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 A[0:64] at info.c:{{[0-9]+}}:7
// INFO: Libomptarget device 0 info: Exiting OpenMP data region at info.c:{{[0-9]+}}:1
// INFO: Libomptarget device 0 info: Removing map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=C[0:64]
// INFO: Libomptarget device 0 info: Removing map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=B[0:64]
// INFO: Libomptarget device 0 info: Removing map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=A[0:64]
@ -40,5 +42,10 @@ int main() {
#pragma omp target firstprivate(val)
{ val = 1; }
__tgt_set_info_flag(0x0);
// INFO-NOT: Libomptarget device 0 info: {{.*}}
#pragma omp target
{ }
return 0;
}