[OpenMP][libomptarget] Fix union.

Summary: To make the two parts of the union have the same size, the size of vect needs to be increased by 16 bits. Reviewers: grokos, carlo.bertolli, caomhin, ABataev Reviewed By: grokos, ABataev Subscribers: fedor.sergeev, guansong, openmp-commits Differential Revision: https://reviews.llvm.org/D44254 llvm-svn: 327040
2018-03-08 18:44:02 +00:00 · 2018-03-08 18:44:02 +00:00 · d5e5992f9a
parent 31051f8314
commit d5e5992f9a
2 changed files with 41 additions and 45 deletions
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@ -107,27 +107,27 @@ public:
  // methods for flags
  INLINE omp_sched_t GetRuntimeSched();
  INLINE void SetRuntimeSched(omp_sched_t sched);
-  INLINE int IsDynamic() { return data.items.flags & TaskDescr_IsDynamic; }
+  INLINE int IsDynamic() { return items.flags & TaskDescr_IsDynamic; }
  INLINE void SetDynamic() {
-    data.items.flags = data.items.flags | TaskDescr_IsDynamic;
+    items.flags = items.flags | TaskDescr_IsDynamic;
  }
  INLINE void ClearDynamic() {
-    data.items.flags = data.items.flags & (~TaskDescr_IsDynamic);
+    items.flags = items.flags & (~TaskDescr_IsDynamic);
  }
-  INLINE int InParallelRegion() { return data.items.flags & TaskDescr_InPar; }
+  INLINE int InParallelRegion() { return items.flags & TaskDescr_InPar; }
  INLINE int InL2OrHigherParallelRegion() {
-    return data.items.flags & TaskDescr_InParL2P;
+    return items.flags & TaskDescr_InParL2P;
  }
  INLINE int IsParallelConstruct() {
-    return data.items.flags & TaskDescr_IsParConstr;
+    return items.flags & TaskDescr_IsParConstr;
  }
  INLINE int IsTaskConstruct() { return !IsParallelConstruct(); }
  // methods for other fields
-  INLINE uint16_t &NThreads() { return data.items.nthreads; }
-  INLINE uint16_t &ThreadLimit() { return data.items.threadlimit; }
-  INLINE uint16_t &ThreadId() { return data.items.threadId; }
-  INLINE uint16_t &ThreadsInTeam() { return data.items.threadsInTeam; }
-  INLINE uint64_t &RuntimeChunkSize() { return data.items.runtimeChunkSize; }
+  INLINE uint16_t &NThreads() { return items.nthreads; }
+  INLINE uint16_t &ThreadLimit() { return items.threadlimit; }
+  INLINE uint16_t &ThreadId() { return items.threadId; }
+  INLINE uint16_t &ThreadsInTeam() { return items.threadsInTeam; }
+  INLINE uint64_t &RuntimeChunkSize() { return items.runtimeChunkSize; }
  INLINE omptarget_nvptx_TaskDescr *GetPrevTaskDescr() { return prev; }
  INLINE void SetPrevTaskDescr(omptarget_nvptx_TaskDescr *taskDescr) {
    prev = taskDescr;
@ -160,8 +160,6 @@ private:
  static const uint8_t TaskDescr_IsParConstr = 0x20;
  static const uint8_t TaskDescr_InParL2P = 0x40;

-  union { // both have same size
-    uint64_t vect[2];
  struct TaskDescr_items {
    uint8_t flags; // 6 bit used (see flag above)
    uint8_t unused;
@ -171,7 +169,6 @@ private:
    uint16_t threadsInTeam;    // threads in current team
    uint64_t runtimeChunkSize; // runtime chunk size
  } items;
-  } data;
  omptarget_nvptx_TaskDescr *prev;
 };

--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptxi.h
@ -18,7 +18,7 @@

 INLINE omp_sched_t omptarget_nvptx_TaskDescr::GetRuntimeSched() {
  // sched starts from 1..4; encode it as 0..3; so add 1 here
-  uint8_t rc = (data.items.flags & TaskDescr_SchedMask) + 1;
+  uint8_t rc = (items.flags & TaskDescr_SchedMask) + 1;
  return (omp_sched_t)rc;
 }

@ -26,9 +26,9 @@ INLINE void omptarget_nvptx_TaskDescr::SetRuntimeSched(omp_sched_t sched) {
  // sched starts from 1..4; encode it as 0..3; so sub 1 here
  uint8_t val = ((uint8_t)sched) - 1;
  // clear current sched
-  data.items.flags &= ~TaskDescr_SchedMask;
+  items.flags &= ~TaskDescr_SchedMask;
  // set new sched
-  data.items.flags |= val;
+  items.flags |= val;
 }

 INLINE void omptarget_nvptx_TaskDescr::InitLevelZeroTaskDescr() {
@ -38,12 +38,12 @@ INLINE void omptarget_nvptx_TaskDescr::InitLevelZeroTaskDescr() {
  //   dyn is off (unused now anyway, but may need to sample from host ?)
  //   not in parallel

-  data.items.flags = 0;
-  data.items.nthreads = GetNumberOfProcsInTeam();
+  items.flags = 0;
+  items.nthreads = GetNumberOfProcsInTeam();
  ;                                // threads: whatever was alloc by kernel
-  data.items.threadId = 0;         // is master
-  data.items.threadsInTeam = 1;    // sequential
-  data.items.runtimeChunkSize = 1; // prefered chunking statik with chunk 1
+  items.threadId = 0;         // is master
+  items.threadsInTeam = 1;    // sequential
+  items.runtimeChunkSize = 1; // prefered chunking statik with chunk 1
 }

 // This is called when all threads are started together in SPMD mode.
@ -56,20 +56,19 @@ INLINE void omptarget_nvptx_TaskDescr::InitLevelOneTaskDescr(
  //   dyn is off (unused now anyway, but may need to sample from host ?)
  //   in L1 parallel

-  data.items.flags =
+  items.flags =
      TaskDescr_InPar | TaskDescr_IsParConstr; // set flag to parallel
-  data.items.nthreads = 0; // # threads for subsequent parallel region
-  data.items.threadId =
+  items.nthreads = 0; // # threads for subsequent parallel region
+  items.threadId =
      GetThreadIdInBlock(); // get ids from cuda (only called for 1st level)
-  data.items.threadsInTeam = tnum;
-  data.items.runtimeChunkSize = 1; // prefered chunking statik with chunk 1
+  items.threadsInTeam = tnum;
+  items.runtimeChunkSize = 1; // prefered chunking statik with chunk 1
  prev = parentTaskDescr;
 }

 INLINE void omptarget_nvptx_TaskDescr::CopyData(
    omptarget_nvptx_TaskDescr *sourceTaskDescr) {
-  data.vect[0] = sourceTaskDescr->data.vect[0];
-  data.vect[1] = sourceTaskDescr->data.vect[1];
+  items = sourceTaskDescr->items;
 }

 INLINE void
@ -87,7 +86,7 @@ INLINE void omptarget_nvptx_TaskDescr::CopyParent(
 INLINE void omptarget_nvptx_TaskDescr::CopyForExplicitTask(
    omptarget_nvptx_TaskDescr *parentTaskDescr) {
  CopyParent(parentTaskDescr);
-  data.items.flags = data.items.flags & ~TaskDescr_IsParConstr;
+  items.flags = items.flags & ~TaskDescr_IsParConstr;
  ASSERT0(LT_FUSSY, IsTaskConstruct(), "expected task");
 }

@ -95,9 +94,9 @@ INLINE void omptarget_nvptx_TaskDescr::CopyToWorkDescr(
    omptarget_nvptx_TaskDescr *masterTaskDescr, uint16_t tnum) {
  CopyParent(masterTaskDescr);
  // overrwrite specific items;
-  data.items.flags |=
+  items.flags |=
      TaskDescr_InPar | TaskDescr_IsParConstr; // set flag to parallel
-  data.items.threadsInTeam = tnum;             // set number of threads
+  items.threadsInTeam = tnum;             // set number of threads
 }

 INLINE void omptarget_nvptx_TaskDescr::CopyFromWorkDescr(
@ -114,16 +113,16 @@ INLINE void omptarget_nvptx_TaskDescr::CopyFromWorkDescr(
  // never enters this region.  When a parallel region is executed serially,
  // the threadId is set to 0 elsewhere and the kmpc_serialized_* functions
  // are called, which never activate this region.
-  data.items.threadId =
+  items.threadId =
      GetThreadIdInBlock(); // get ids from cuda (only called for 1st level)
 }

 INLINE void omptarget_nvptx_TaskDescr::CopyConvergentParent(
    omptarget_nvptx_TaskDescr *parentTaskDescr, uint16_t tid, uint16_t tnum) {
  CopyParent(parentTaskDescr);
-  data.items.flags |= TaskDescr_InParL2P; // In L2+ parallelism
-  data.items.threadsInTeam = tnum;        // set number of threads
-  data.items.threadId = tid;
+  items.flags |= TaskDescr_InParL2P; // In L2+ parallelism
+  items.threadsInTeam = tnum;        // set number of threads
+  items.threadId = tid;
 }

 ////////////////////////////////////////////////////////////////////////////////