[libomptarget] Remove duplicate RTLRequiresFlags per device

We have one global RTLs.RequiresFlags, I don't see a need to make a
copy per device that the runtime manages. This was problematic anyway
because the copy happened during the first __tgt_register_lib(). This
made it impossible to call __tgt_register_requires() from normal user
funtions for testing.
Hence, this change also fixes unified_shared_memory/shared_update.c for
older versions of Clang that don't call __tgt_register_requires() before
__tgt_register_lib().

Differential Revision: https://reviews.llvm.org/D66019

llvm-svn: 368465
This commit is contained in:
Jonas Hahnfeld 2019-08-09 19:20:39 +00:00
parent 4fe911d9dd
commit 7a0f2dc5a4
5 changed files with 14 additions and 21 deletions

View File

@ -120,7 +120,7 @@ EXTERN int omp_target_is_present(void *ptr, int device_num) {
// getTgtPtrBegin() function which means that there is no device
// corresponding point for ptr. This function should return false
// in that situation.
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
rc = !IsHostPtr;
DP("Call to omp_target_is_present returns %d\n", rc);
return rc;

View File

@ -194,7 +194,7 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
// maps are respected.
// TODO: In addition to the mapping rules above, when the close map
// modifier is implemented, foce the mapping of the variable to the device.
if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
IsHostPtr = true;
@ -241,7 +241,7 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
(CONSIDERED_INF(HT.RefCount)) ? "INF" :
std::to_string(HT.RefCount).c_str());
rc = (void *)tp;
} else if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
} else if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
// If the value isn't found in the mapping and unified shared memory
// is on then it means we have stumbled upon a value which we need to
// use directly from the host.
@ -270,7 +270,7 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
}
int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) {
if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
return OFFLOAD_SUCCESS;
// Check if the pointer is contained in any sub-nodes.
int rc;
@ -305,7 +305,7 @@ int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) {
void DeviceTy::init() {
// Make call to init_requires if it exists for this plugin.
if (RTL->init_requires)
RTL->init_requires(RTLRequiresFlags);
RTL->init_requires(RTLs.RequiresFlags);
int32_t rc = RTL->init_device(RTLDeviceID);
if (rc == OFFLOAD_SUCCESS) {
IsInit = true;

View File

@ -100,13 +100,10 @@ struct DeviceTy {
// moved into the target task in libomp.
std::map<int32_t, uint64_t> LoopTripCnt;
int64_t RTLRequiresFlags;
DeviceTy(RTLInfoTy *RTL)
: DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(),
HasPendingGlobals(false), HostDataToTargetMap(),
PendingCtorsDtors(), ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(),
ShadowMtx(), RTLRequiresFlags(0) {}
HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(),
ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), ShadowMtx() {}
// The existence of mutexes makes DeviceTy non-copyable. We need to
// provide a copy constructor and an assignment operator explicitly.
@ -115,9 +112,8 @@ struct DeviceTy {
IsInit(d.IsInit), InitFlag(), HasPendingGlobals(d.HasPendingGlobals),
HostDataToTargetMap(d.HostDataToTargetMap),
PendingCtorsDtors(d.PendingCtorsDtors), ShadowPtrMap(d.ShadowPtrMap),
DataMapMtx(), PendingGlobalsMtx(),
ShadowMtx(), LoopTripCnt(d.LoopTripCnt),
RTLRequiresFlags(d.RTLRequiresFlags) {}
DataMapMtx(), PendingGlobalsMtx(), ShadowMtx(),
LoopTripCnt(d.LoopTripCnt) {}
DeviceTy& operator=(const DeviceTy &d) {
DeviceID = d.DeviceID;
@ -129,7 +125,6 @@ struct DeviceTy {
PendingCtorsDtors = d.PendingCtorsDtors;
ShadowPtrMap = d.ShadowPtrMap;
LoopTripCnt = d.LoopTripCnt;
RTLRequiresFlags = d.RTLRequiresFlags;
return *this;
}

View File

@ -290,7 +290,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
bool copy = false;
if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
copy = true;
} else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
@ -390,7 +390,7 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
bool CopyMember = false;
if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
!(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
// Copy data only if the "parent" struct has RefCount==1.
@ -404,7 +404,7 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
}
if ((DelEntry || Always || CopyMember) &&
!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
TgtPtrBegin == HstPtrBegin)) {
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
@ -486,7 +486,7 @@ int target_data_update(DeviceTy &Device, int32_t arg_num,
continue;
}
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
TgtPtrBegin == HstPtrBegin) {
DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
DPxPTR(HstPtrBegin));
@ -669,7 +669,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
DPxPTR(HstPtrVal));
continue;
}
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
TgtPtrBegin == HstPtrBegin) {
DP("Unified memory is active, no need to map lambda captured"
"variable (" DPxMOD ")\n", DPxPTR(HstPtrVal));

View File

@ -266,8 +266,6 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
Devices[start + device_id].DeviceID = start + device_id;
// RTL local device ID
Devices[start + device_id].RTLDeviceID = device_id;
// RTL requires flags
Devices[start + device_id].RTLRequiresFlags = RequiresFlags;
}
// Initialize the index of this RTL and save it in the used RTLs.