[libomptarget] Remove duplicate RTLRequiresFlags per device
We have one global RTLs.RequiresFlags, I don't see a need to make a copy per device that the runtime manages. This was problematic anyway because the copy happened during the first __tgt_register_lib(). This made it impossible to call __tgt_register_requires() from normal user funtions for testing. Hence, this change also fixes unified_shared_memory/shared_update.c for older versions of Clang that don't call __tgt_register_requires() before __tgt_register_lib(). Differential Revision: https://reviews.llvm.org/D66019 llvm-svn: 368465
This commit is contained in:
parent
4fe911d9dd
commit
7a0f2dc5a4
|
@ -120,7 +120,7 @@ EXTERN int omp_target_is_present(void *ptr, int device_num) {
|
|||
// getTgtPtrBegin() function which means that there is no device
|
||||
// corresponding point for ptr. This function should return false
|
||||
// in that situation.
|
||||
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
|
||||
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
|
||||
rc = !IsHostPtr;
|
||||
DP("Call to omp_target_is_present returns %d\n", rc);
|
||||
return rc;
|
||||
|
|
|
@ -194,7 +194,7 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
|
|||
// maps are respected.
|
||||
// TODO: In addition to the mapping rules above, when the close map
|
||||
// modifier is implemented, foce the mapping of the variable to the device.
|
||||
if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
|
||||
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
|
||||
DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
|
||||
DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
|
||||
IsHostPtr = true;
|
||||
|
@ -241,7 +241,7 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
|
|||
(CONSIDERED_INF(HT.RefCount)) ? "INF" :
|
||||
std::to_string(HT.RefCount).c_str());
|
||||
rc = (void *)tp;
|
||||
} else if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
|
||||
} else if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
|
||||
// If the value isn't found in the mapping and unified shared memory
|
||||
// is on then it means we have stumbled upon a value which we need to
|
||||
// use directly from the host.
|
||||
|
@ -270,7 +270,7 @@ void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size) {
|
|||
}
|
||||
|
||||
int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) {
|
||||
if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
|
||||
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
|
||||
return OFFLOAD_SUCCESS;
|
||||
// Check if the pointer is contained in any sub-nodes.
|
||||
int rc;
|
||||
|
@ -305,7 +305,7 @@ int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) {
|
|||
void DeviceTy::init() {
|
||||
// Make call to init_requires if it exists for this plugin.
|
||||
if (RTL->init_requires)
|
||||
RTL->init_requires(RTLRequiresFlags);
|
||||
RTL->init_requires(RTLs.RequiresFlags);
|
||||
int32_t rc = RTL->init_device(RTLDeviceID);
|
||||
if (rc == OFFLOAD_SUCCESS) {
|
||||
IsInit = true;
|
||||
|
|
|
@ -100,13 +100,10 @@ struct DeviceTy {
|
|||
// moved into the target task in libomp.
|
||||
std::map<int32_t, uint64_t> LoopTripCnt;
|
||||
|
||||
int64_t RTLRequiresFlags;
|
||||
|
||||
DeviceTy(RTLInfoTy *RTL)
|
||||
: DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(),
|
||||
HasPendingGlobals(false), HostDataToTargetMap(),
|
||||
PendingCtorsDtors(), ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(),
|
||||
ShadowMtx(), RTLRequiresFlags(0) {}
|
||||
HasPendingGlobals(false), HostDataToTargetMap(), PendingCtorsDtors(),
|
||||
ShadowPtrMap(), DataMapMtx(), PendingGlobalsMtx(), ShadowMtx() {}
|
||||
|
||||
// The existence of mutexes makes DeviceTy non-copyable. We need to
|
||||
// provide a copy constructor and an assignment operator explicitly.
|
||||
|
@ -115,9 +112,8 @@ struct DeviceTy {
|
|||
IsInit(d.IsInit), InitFlag(), HasPendingGlobals(d.HasPendingGlobals),
|
||||
HostDataToTargetMap(d.HostDataToTargetMap),
|
||||
PendingCtorsDtors(d.PendingCtorsDtors), ShadowPtrMap(d.ShadowPtrMap),
|
||||
DataMapMtx(), PendingGlobalsMtx(),
|
||||
ShadowMtx(), LoopTripCnt(d.LoopTripCnt),
|
||||
RTLRequiresFlags(d.RTLRequiresFlags) {}
|
||||
DataMapMtx(), PendingGlobalsMtx(), ShadowMtx(),
|
||||
LoopTripCnt(d.LoopTripCnt) {}
|
||||
|
||||
DeviceTy& operator=(const DeviceTy &d) {
|
||||
DeviceID = d.DeviceID;
|
||||
|
@ -129,7 +125,6 @@ struct DeviceTy {
|
|||
PendingCtorsDtors = d.PendingCtorsDtors;
|
||||
ShadowPtrMap = d.ShadowPtrMap;
|
||||
LoopTripCnt = d.LoopTripCnt;
|
||||
RTLRequiresFlags = d.RTLRequiresFlags;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
|
|
@ -290,7 +290,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num,
|
|||
|
||||
if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
|
||||
bool copy = false;
|
||||
if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
|
||||
if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
|
||||
if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
|
||||
copy = true;
|
||||
} else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
|
||||
|
@ -390,7 +390,7 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
|
|||
if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
|
||||
bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
|
||||
bool CopyMember = false;
|
||||
if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
|
||||
if (!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
|
||||
if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
|
||||
!(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
|
||||
// Copy data only if the "parent" struct has RefCount==1.
|
||||
|
@ -404,7 +404,7 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
|
|||
}
|
||||
|
||||
if ((DelEntry || Always || CopyMember) &&
|
||||
!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
!(RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
TgtPtrBegin == HstPtrBegin)) {
|
||||
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
|
||||
data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
|
||||
|
@ -486,7 +486,7 @@ int target_data_update(DeviceTy &Device, int32_t arg_num,
|
|||
continue;
|
||||
}
|
||||
|
||||
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
TgtPtrBegin == HstPtrBegin) {
|
||||
DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
|
||||
DPxPTR(HstPtrBegin));
|
||||
|
@ -669,7 +669,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
|
|||
DPxPTR(HstPtrVal));
|
||||
continue;
|
||||
}
|
||||
if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
if (RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
||||
TgtPtrBegin == HstPtrBegin) {
|
||||
DP("Unified memory is active, no need to map lambda captured"
|
||||
"variable (" DPxMOD ")\n", DPxPTR(HstPtrVal));
|
||||
|
|
|
@ -266,8 +266,6 @@ void RTLsTy::RegisterLib(__tgt_bin_desc *desc) {
|
|||
Devices[start + device_id].DeviceID = start + device_id;
|
||||
// RTL local device ID
|
||||
Devices[start + device_id].RTLDeviceID = device_id;
|
||||
// RTL requires flags
|
||||
Devices[start + device_id].RTLRequiresFlags = RequiresFlags;
|
||||
}
|
||||
|
||||
// Initialize the index of this RTL and save it in the used RTLs.
|
||||
|
|
Loading…
Reference in New Issue