From ed7ec860f03caf6b702f27a74c3682d061f60e1d Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 27 Jul 2021 12:54:04 -0500 Subject: [PATCH] [OpenMP] Improve alignment handling in the new device runtime --- openmp/libomptarget/DeviceRTL/include/Utils.h | 9 +++++++++ openmp/libomptarget/DeviceRTL/src/State.cpp | 8 ++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/openmp/libomptarget/DeviceRTL/include/Utils.h b/openmp/libomptarget/DeviceRTL/include/Utils.h index 912c40781612..dc4b1cd71a59 100644 --- a/openmp/libomptarget/DeviceRTL/include/Utils.h +++ b/openmp/libomptarget/DeviceRTL/include/Utils.h @@ -63,6 +63,15 @@ inline uint32_t popc(uint64_t V) { return __builtin_popcountl(V); } +/// Return \p V aligned "upwards" according to \p Align. +template inline Ty1 align_up(Ty1 V, Ty2 Align) { + return ((V + Ty1(Align) - 1) / Ty1(Align)) * Ty1(Align); +} +/// Return \p V aligned "downwards" according to \p Align. +template inline Ty1 align_down(Ty1 V, Ty2 Align) { + return V - V % Align; +} + #define OMP_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true) #define OMP_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false) diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp index dae262a04082..2e00a6ecb02f 100644 --- a/openmp/libomptarget/DeviceRTL/src/State.cpp +++ b/openmp/libomptarget/DeviceRTL/src/State.cpp @@ -85,8 +85,8 @@ private: /// Compute the size of the storage space reserved for a thread. uint32_t computeThreadStorageTotal() { uint32_t NumLanesInBlock = mapping::getNumberOfProcessorElements(); - return (state::SharedScratchpadSize - NumLanesInBlock + 1) / - NumLanesInBlock; + return utils::align_down((state::SharedScratchpadSize / NumLanesInBlock), + Alignment); } /// Return the top address of the warp data stack, that is the first address @@ -114,7 +114,7 @@ void SharedMemorySmartStackTy::init(bool IsSPMD) { void *SharedMemorySmartStackTy::push(uint64_t Bytes) { // First align the number of requested bytes. - uint64_t AlignedBytes = (Bytes + (Alignment - 1)) / Alignment * Alignment; + uint64_t AlignedBytes = utils::align_up(Bytes, Alignment); uint32_t StorageTotal = computeThreadStorageTotal(); @@ -136,7 +136,7 @@ void *SharedMemorySmartStackTy::push(uint64_t Bytes) { } void SharedMemorySmartStackTy::pop(void *Ptr, uint32_t Bytes) { - uint64_t AlignedBytes = (Bytes + (Alignment - 1)) / Alignment * Alignment; + uint64_t AlignedBytes = utils::align_up(Bytes, Alignment); if (Ptr >= &Data[0] && Ptr < &Data[state::SharedScratchpadSize]) { int TId = mapping::getThreadIdInBlock(); Usage[TId] -= AlignedBytes;