[OpenMP][libomptarget] Fix union.
Summary: To make the two parts of the union have the same size, the size of vect needs to be increased by 16 bits. Reviewers: grokos, carlo.bertolli, caomhin, ABataev Reviewed By: grokos, ABataev Subscribers: fedor.sergeev, guansong, openmp-commits Differential Revision: https://reviews.llvm.org/D44254 llvm-svn: 327040
This commit is contained in:
parent
31051f8314
commit
d5e5992f9a
|
@ -107,27 +107,27 @@ public:
|
|||
// methods for flags
|
||||
INLINE omp_sched_t GetRuntimeSched();
|
||||
INLINE void SetRuntimeSched(omp_sched_t sched);
|
||||
INLINE int IsDynamic() { return data.items.flags & TaskDescr_IsDynamic; }
|
||||
INLINE int IsDynamic() { return items.flags & TaskDescr_IsDynamic; }
|
||||
INLINE void SetDynamic() {
|
||||
data.items.flags = data.items.flags | TaskDescr_IsDynamic;
|
||||
items.flags = items.flags | TaskDescr_IsDynamic;
|
||||
}
|
||||
INLINE void ClearDynamic() {
|
||||
data.items.flags = data.items.flags & (~TaskDescr_IsDynamic);
|
||||
items.flags = items.flags & (~TaskDescr_IsDynamic);
|
||||
}
|
||||
INLINE int InParallelRegion() { return data.items.flags & TaskDescr_InPar; }
|
||||
INLINE int InParallelRegion() { return items.flags & TaskDescr_InPar; }
|
||||
INLINE int InL2OrHigherParallelRegion() {
|
||||
return data.items.flags & TaskDescr_InParL2P;
|
||||
return items.flags & TaskDescr_InParL2P;
|
||||
}
|
||||
INLINE int IsParallelConstruct() {
|
||||
return data.items.flags & TaskDescr_IsParConstr;
|
||||
return items.flags & TaskDescr_IsParConstr;
|
||||
}
|
||||
INLINE int IsTaskConstruct() { return !IsParallelConstruct(); }
|
||||
// methods for other fields
|
||||
INLINE uint16_t &NThreads() { return data.items.nthreads; }
|
||||
INLINE uint16_t &ThreadLimit() { return data.items.threadlimit; }
|
||||
INLINE uint16_t &ThreadId() { return data.items.threadId; }
|
||||
INLINE uint16_t &ThreadsInTeam() { return data.items.threadsInTeam; }
|
||||
INLINE uint64_t &RuntimeChunkSize() { return data.items.runtimeChunkSize; }
|
||||
INLINE uint16_t &NThreads() { return items.nthreads; }
|
||||
INLINE uint16_t &ThreadLimit() { return items.threadlimit; }
|
||||
INLINE uint16_t &ThreadId() { return items.threadId; }
|
||||
INLINE uint16_t &ThreadsInTeam() { return items.threadsInTeam; }
|
||||
INLINE uint64_t &RuntimeChunkSize() { return items.runtimeChunkSize; }
|
||||
INLINE omptarget_nvptx_TaskDescr *GetPrevTaskDescr() { return prev; }
|
||||
INLINE void SetPrevTaskDescr(omptarget_nvptx_TaskDescr *taskDescr) {
|
||||
prev = taskDescr;
|
||||
|
@ -160,8 +160,6 @@ private:
|
|||
static const uint8_t TaskDescr_IsParConstr = 0x20;
|
||||
static const uint8_t TaskDescr_InParL2P = 0x40;
|
||||
|
||||
union { // both have same size
|
||||
uint64_t vect[2];
|
||||
struct TaskDescr_items {
|
||||
uint8_t flags; // 6 bit used (see flag above)
|
||||
uint8_t unused;
|
||||
|
@ -171,7 +169,6 @@ private:
|
|||
uint16_t threadsInTeam; // threads in current team
|
||||
uint64_t runtimeChunkSize; // runtime chunk size
|
||||
} items;
|
||||
} data;
|
||||
omptarget_nvptx_TaskDescr *prev;
|
||||
};
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
|
||||
INLINE omp_sched_t omptarget_nvptx_TaskDescr::GetRuntimeSched() {
|
||||
// sched starts from 1..4; encode it as 0..3; so add 1 here
|
||||
uint8_t rc = (data.items.flags & TaskDescr_SchedMask) + 1;
|
||||
uint8_t rc = (items.flags & TaskDescr_SchedMask) + 1;
|
||||
return (omp_sched_t)rc;
|
||||
}
|
||||
|
||||
|
@ -26,9 +26,9 @@ INLINE void omptarget_nvptx_TaskDescr::SetRuntimeSched(omp_sched_t sched) {
|
|||
// sched starts from 1..4; encode it as 0..3; so sub 1 here
|
||||
uint8_t val = ((uint8_t)sched) - 1;
|
||||
// clear current sched
|
||||
data.items.flags &= ~TaskDescr_SchedMask;
|
||||
items.flags &= ~TaskDescr_SchedMask;
|
||||
// set new sched
|
||||
data.items.flags |= val;
|
||||
items.flags |= val;
|
||||
}
|
||||
|
||||
INLINE void omptarget_nvptx_TaskDescr::InitLevelZeroTaskDescr() {
|
||||
|
@ -38,12 +38,12 @@ INLINE void omptarget_nvptx_TaskDescr::InitLevelZeroTaskDescr() {
|
|||
// dyn is off (unused now anyway, but may need to sample from host ?)
|
||||
// not in parallel
|
||||
|
||||
data.items.flags = 0;
|
||||
data.items.nthreads = GetNumberOfProcsInTeam();
|
||||
items.flags = 0;
|
||||
items.nthreads = GetNumberOfProcsInTeam();
|
||||
; // threads: whatever was alloc by kernel
|
||||
data.items.threadId = 0; // is master
|
||||
data.items.threadsInTeam = 1; // sequential
|
||||
data.items.runtimeChunkSize = 1; // prefered chunking statik with chunk 1
|
||||
items.threadId = 0; // is master
|
||||
items.threadsInTeam = 1; // sequential
|
||||
items.runtimeChunkSize = 1; // prefered chunking statik with chunk 1
|
||||
}
|
||||
|
||||
// This is called when all threads are started together in SPMD mode.
|
||||
|
@ -56,20 +56,19 @@ INLINE void omptarget_nvptx_TaskDescr::InitLevelOneTaskDescr(
|
|||
// dyn is off (unused now anyway, but may need to sample from host ?)
|
||||
// in L1 parallel
|
||||
|
||||
data.items.flags =
|
||||
items.flags =
|
||||
TaskDescr_InPar | TaskDescr_IsParConstr; // set flag to parallel
|
||||
data.items.nthreads = 0; // # threads for subsequent parallel region
|
||||
data.items.threadId =
|
||||
items.nthreads = 0; // # threads for subsequent parallel region
|
||||
items.threadId =
|
||||
GetThreadIdInBlock(); // get ids from cuda (only called for 1st level)
|
||||
data.items.threadsInTeam = tnum;
|
||||
data.items.runtimeChunkSize = 1; // prefered chunking statik with chunk 1
|
||||
items.threadsInTeam = tnum;
|
||||
items.runtimeChunkSize = 1; // prefered chunking statik with chunk 1
|
||||
prev = parentTaskDescr;
|
||||
}
|
||||
|
||||
INLINE void omptarget_nvptx_TaskDescr::CopyData(
|
||||
omptarget_nvptx_TaskDescr *sourceTaskDescr) {
|
||||
data.vect[0] = sourceTaskDescr->data.vect[0];
|
||||
data.vect[1] = sourceTaskDescr->data.vect[1];
|
||||
items = sourceTaskDescr->items;
|
||||
}
|
||||
|
||||
INLINE void
|
||||
|
@ -87,7 +86,7 @@ INLINE void omptarget_nvptx_TaskDescr::CopyParent(
|
|||
INLINE void omptarget_nvptx_TaskDescr::CopyForExplicitTask(
|
||||
omptarget_nvptx_TaskDescr *parentTaskDescr) {
|
||||
CopyParent(parentTaskDescr);
|
||||
data.items.flags = data.items.flags & ~TaskDescr_IsParConstr;
|
||||
items.flags = items.flags & ~TaskDescr_IsParConstr;
|
||||
ASSERT0(LT_FUSSY, IsTaskConstruct(), "expected task");
|
||||
}
|
||||
|
||||
|
@ -95,9 +94,9 @@ INLINE void omptarget_nvptx_TaskDescr::CopyToWorkDescr(
|
|||
omptarget_nvptx_TaskDescr *masterTaskDescr, uint16_t tnum) {
|
||||
CopyParent(masterTaskDescr);
|
||||
// overrwrite specific items;
|
||||
data.items.flags |=
|
||||
items.flags |=
|
||||
TaskDescr_InPar | TaskDescr_IsParConstr; // set flag to parallel
|
||||
data.items.threadsInTeam = tnum; // set number of threads
|
||||
items.threadsInTeam = tnum; // set number of threads
|
||||
}
|
||||
|
||||
INLINE void omptarget_nvptx_TaskDescr::CopyFromWorkDescr(
|
||||
|
@ -114,16 +113,16 @@ INLINE void omptarget_nvptx_TaskDescr::CopyFromWorkDescr(
|
|||
// never enters this region. When a parallel region is executed serially,
|
||||
// the threadId is set to 0 elsewhere and the kmpc_serialized_* functions
|
||||
// are called, which never activate this region.
|
||||
data.items.threadId =
|
||||
items.threadId =
|
||||
GetThreadIdInBlock(); // get ids from cuda (only called for 1st level)
|
||||
}
|
||||
|
||||
INLINE void omptarget_nvptx_TaskDescr::CopyConvergentParent(
|
||||
omptarget_nvptx_TaskDescr *parentTaskDescr, uint16_t tid, uint16_t tnum) {
|
||||
CopyParent(parentTaskDescr);
|
||||
data.items.flags |= TaskDescr_InParL2P; // In L2+ parallelism
|
||||
data.items.threadsInTeam = tnum; // set number of threads
|
||||
data.items.threadId = tid;
|
||||
items.flags |= TaskDescr_InParL2P; // In L2+ parallelism
|
||||
items.threadsInTeam = tnum; // set number of threads
|
||||
items.threadId = tid;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
|
Loading…
Reference in New Issue