[OpenMP][libomptarget] Enable requires flags for target libraries.

Summary:
Target link variables are currently implemented by creating a copy of the variables on the device side and unified memory never gets exploited.

When the prgram uses the:

```
#pragma omp requires unified_shared_memory
```

directive in conjunction with a declare target link, the linked variable is no longer allocated on the device and the host version is used instead.

This behavior is overridden by performing an explicit mapping.

A Clang side patch is required.

Reviewers: ABataev, AlexEichenberger, grokos, Hahnfeld

Reviewed By: AlexEichenberger, grokos, Hahnfeld

Subscribers: Hahnfeld, jfb, guansong, jdoerfert, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D60223

llvm-svn: 361294
This commit is contained in:
Gheorghe-Teodor Bercea 2019-05-21 19:35:02 +00:00
parent 44d17ca02e
commit 9e9c918259
8 changed files with 126 additions and 4 deletions

View File

@ -60,6 +60,21 @@ enum OpenMPOffloadingDeclareTargetFlags {
OMP_DECLARE_TARGET_DTOR = 0x04
};
enum OpenMPOffloadingRequiresDirFlags {
/// flag undefined.
OMP_REQ_UNDEFINED = 0x000,
/// no requires directive present.
OMP_REQ_NONE = 0x001,
/// reverse_offload clause.
OMP_REQ_REVERSE_OFFLOAD = 0x002,
/// unified_address clause.
OMP_REQ_UNIFIED_ADDRESS = 0x004,
/// unified_shared_memory clause.
OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
/// dynamic_allocators clause.
OMP_REQ_DYNAMIC_ALLOCATORS = 0x010
};
/// This struct is a record of an entry point or global. For a function
/// entry point the size is expected to be zero
struct __tgt_offload_entry {
@ -113,6 +128,9 @@ int omp_target_associate_ptr(void *host_ptr, void *device_ptr, size_t size,
size_t device_offset, int device_num);
int omp_target_disassociate_ptr(void *host_ptr, int device_num);
/// add the clauses of the requires directives in a given file
void __tgt_register_requires(int64_t flags);
/// adds a target shared library to the target execution image
void __tgt_register_lib(__tgt_bin_desc *desc);

View File

@ -152,7 +152,7 @@ LookupResult DeviceTy::lookupMapping(void *HstPtrBegin, int64_t Size) {
// Used by target_data_begin
// Return the target pointer begin (where the data will be moved).
// Allocate memory if this is the first occurrence if this mapping.
// Allocate memory if this is the first occurrence of this mapping.
// Increment the reference counter.
// If NULL is returned, then either data allocation failed or the user tried
// to do an illegal mapping.

View File

@ -98,11 +98,13 @@ struct DeviceTy {
uint64_t loopTripCnt;
int64_t RTLRequiresFlags;
DeviceTy(RTLInfoTy *RTL)
: DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(),
HasPendingGlobals(false), HostDataToTargetMap(),
PendingCtorsDtors(), ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(),
ShadowMtx(), loopTripCnt(0) {}
ShadowMtx(), loopTripCnt(0), RTLRequiresFlags(0) {}
// The existence of mutexes makes DeviceTy non-copyable. We need to
// provide a copy constructor and an assignment operator explicitly.
@ -112,7 +114,8 @@ struct DeviceTy {
HostDataToTargetMap(d.HostDataToTargetMap),
PendingCtorsDtors(d.PendingCtorsDtors), ShadowPtrMap(d.ShadowPtrMap),
DataMapMtx(), PendingGlobalsMtx(),
ShadowMtx(), loopTripCnt(d.loopTripCnt) {}
ShadowMtx(), loopTripCnt(d.loopTripCnt),
RTLRequiresFlags(d.RTLRequiresFlags) {}
DeviceTy& operator=(const DeviceTy &d) {
DeviceID = d.DeviceID;
@ -124,6 +127,7 @@ struct DeviceTy {
PendingCtorsDtors = d.PendingCtorsDtors;
ShadowPtrMap = d.ShadowPtrMap;
loopTripCnt = d.loopTripCnt;
RTLRequiresFlags = d.RTLRequiresFlags;
return *this;
}

View File

@ -1,5 +1,6 @@
VERS1.0 {
global:
__tgt_register_requires;
__tgt_register_lib;
__tgt_unregister_lib;
__tgt_target_data_begin;

View File

@ -57,7 +57,7 @@ static void HandleTargetOutcome(bool success) {
}
break;
case tgt_default:
FATAL_MESSAGE0(1, "default offloading policy must switched to "
FATAL_MESSAGE0(1, "default offloading policy must switched to "
"mandatory or disabled");
break;
case tgt_mandatory:
@ -68,6 +68,12 @@ static void HandleTargetOutcome(bool success) {
}
}
////////////////////////////////////////////////////////////////////////////////
/// adds requires flags
EXTERN void __tgt_register_requires(int64_t flags) {
RTLs.RegisterRequires(flags);
}
////////////////////////////////////////////////////////////////////////////////
/// adds a target shared library to the target execution image
EXTERN void __tgt_register_lib(__tgt_bin_desc *desc) {

View File

@ -186,6 +186,46 @@ static void RegisterGlobalCtorsDtorsForImage(__tgt_bin_desc *desc,
}
}
void RTLsTy::RegisterRequires(int64_t flags) {
// TODO: add more elaborate check.
// Minimal check: only set requires flags if previous value
// is undefined. This ensures that only the first call to this
// function will set the requires flags. All subsequent calls
// will be checked for compatibility.
assert(flags != OMP_REQ_UNDEFINED &&
"illegal undefined flag for requires directive!");
if (RequiresFlags == OMP_REQ_UNDEFINED) {
RequiresFlags = flags;
return;
}
// If multiple compilation units are present enforce
// consistency across all of them for require clauses:
// - reverse_offload
// - unified_address
// - unified_shared_memory
if ((RequiresFlags & OMP_REQ_REVERSE_OFFLOAD) !=
(flags & OMP_REQ_REVERSE_OFFLOAD)) {
FATAL_MESSAGE0(1,
"'#pragma omp requires reverse_offload' not used consistently!");
}
if ((RequiresFlags & OMP_REQ_UNIFIED_ADDRESS) !=
(flags & OMP_REQ_UNIFIED_ADDRESS)) {
FATAL_MESSAGE0(1,
"'#pragma omp requires unified_address' not used consistently!");
}
if ((RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) !=
(flags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
FATAL_MESSAGE0(1,
"'#pragma omp requires unified_shared_memory' not used consistently!");
}
// TODO: insert any other missing checks
DP("New requires flags %ld compatible with existing %ld!\n",
flags, RequiresFlags);
}
void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
// Attempt to load all plugins available in the system.
std::call_once(initFlag, &RTLsTy::LoadRTLs, this);
@ -222,6 +262,8 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
Devices[start + device_id].DeviceID = start + device_id;
// RTL local device ID
Devices[start + device_id].RTLDeviceID = device_id;
// RTL requires flags
Devices[start + device_id].RTLRequiresFlags = RequiresFlags;
}
// Initialize the index of this RTL and save it in the used RTLs.

View File

@ -118,8 +118,13 @@ public:
// binaries.
std::vector<RTLInfoTy *> UsedRTLs;
int64_t RequiresFlags;
explicit RTLsTy() {}
// Register the clauses of the requires directive.
void RegisterRequires(int64_t flags);
// Register a shared library with all (compatible) RTLs.
void RegisterLib(__tgt_bin_desc *desc);

View File

@ -0,0 +1,46 @@
// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu -allow-empty -check-prefix=DEBUG
// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu -allow-empty -check-prefix=DEBUG
// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu -allow-empty -check-prefix=DEBUG
// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu -allow-empty -check-prefix=DEBUG
// REQUIRES: libomptarget-debug
/*
Test for the 'requires' clause check.
When a target region is used, the requires flags are set in the
runtime for the entire compilation unit. If the flags are set again,
(for whatever reason) the set must be consistent with previously
set values.
*/
#include <stdio.h>
#include <omp.h>
// ---------------------------------------------------------------------------
// Various definitions copied from OpenMP RTL
extern void __tgt_register_requires(int64_t);
// End of definitions copied from OpenMP RTL.
// ---------------------------------------------------------------------------
void run_reg_requires() {
// Before the target region is registered, the requires registers the status
// of the requires clauses. Since there are no requires clauses in this file
// the flags state can only be OMP_REQ_NONE i.e. 1.
// This is the 2nd time this function is called so it should print the debug
// info belonging to the check.
__tgt_register_requires(1);
__tgt_register_requires(1);
// DEBUG: New requires flags 1 compatible with existing 1!
}
// ---------------------------------------------------------------------------
int main() {
run_reg_requires();
// This also runs reg requires for the first time.
#pragma omp target
{}
return 0;
}