[OpenMP] Improve alignment handling in the new device runtime

This commit is contained in:
Johannes Doerfert 2021-07-27 12:54:04 -05:00
parent cbb709e251
commit ed7ec860f0
2 changed files with 13 additions and 4 deletions

View File

@ -63,6 +63,15 @@ inline uint32_t popc(uint64_t V) {
return __builtin_popcountl(V);
}
/// Return \p V aligned "upwards" according to \p Align.
template <typename Ty1, typename Ty2> inline Ty1 align_up(Ty1 V, Ty2 Align) {
return ((V + Ty1(Align) - 1) / Ty1(Align)) * Ty1(Align);
}
/// Return \p V aligned "downwards" according to \p Align.
template <typename Ty1, typename Ty2> inline Ty1 align_down(Ty1 V, Ty2 Align) {
return V - V % Align;
}
#define OMP_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true)
#define OMP_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false)

View File

@ -85,8 +85,8 @@ private:
/// Compute the size of the storage space reserved for a thread.
uint32_t computeThreadStorageTotal() {
uint32_t NumLanesInBlock = mapping::getNumberOfProcessorElements();
return (state::SharedScratchpadSize - NumLanesInBlock + 1) /
NumLanesInBlock;
return utils::align_down((state::SharedScratchpadSize / NumLanesInBlock),
Alignment);
}
/// Return the top address of the warp data stack, that is the first address
@ -114,7 +114,7 @@ void SharedMemorySmartStackTy::init(bool IsSPMD) {
void *SharedMemorySmartStackTy::push(uint64_t Bytes) {
// First align the number of requested bytes.
uint64_t AlignedBytes = (Bytes + (Alignment - 1)) / Alignment * Alignment;
uint64_t AlignedBytes = utils::align_up(Bytes, Alignment);
uint32_t StorageTotal = computeThreadStorageTotal();
@ -136,7 +136,7 @@ void *SharedMemorySmartStackTy::push(uint64_t Bytes) {
}
void SharedMemorySmartStackTy::pop(void *Ptr, uint32_t Bytes) {
uint64_t AlignedBytes = (Bytes + (Alignment - 1)) / Alignment * Alignment;
uint64_t AlignedBytes = utils::align_up(Bytes, Alignment);
if (Ptr >= &Data[0] && Ptr < &Data[state::SharedScratchpadSize]) {
int TId = mapping::getThreadIdInBlock();
Usage[TId] -= AlignedBytes;