Revert "[libomptarget] Build DeviceRTL for amdgpu"

- more tests failing on CI than failed locally when writing this patch

This reverts commit 33427fdb7b.
This commit is contained in:
Jon Chesterfield 2021-10-28 01:01:53 +01:00
parent cf37a94c1e
commit 6c7b203d1d
22 changed files with 15 additions and 91 deletions

View File

@ -252,7 +252,7 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
std::string BitcodeSuffix;
if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
options::OPT_fno_openmp_target_new_runtime, false))
BitcodeSuffix = "new-amdgpu-" + GPUArch;
BitcodeSuffix = "new-amdgcn-" + GPUArch;
else
BitcodeSuffix = "amdgcn-" + GPUArch;

View File

@ -226,5 +226,6 @@ foreach(sm ${nvptx_sm_list})
endforeach()
foreach(mcpu ${amdgpu_mcpus})
compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -fvisibility=default -nogpulib)
# require D112227 or similar to enable the compilation for amdgpu
# compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -fvisibility=default -nogpulib)
endforeach()

View File

@ -20,9 +20,9 @@ using namespace _OMP;
#pragma omp declare target
extern uint32_t __omp_rtl_debug_kind; // defined by CGOpenMPRuntimeGPU
extern uint32_t __omp_rtl_debug_kind;
// TODO: We want to change the name as soon as the old runtime is gone.
// TOOD: We want to change the name as soon as the old runtime is gone.
DeviceEnvironmentTy CONSTANT(omptarget_device_environment)
__attribute__((used));

View File

@ -68,23 +68,8 @@ uint64_t atomicAdd(uint64_t *Address, uint64_t Val, int Ordering) {
///{
#pragma omp begin declare variant match(device = {arch(amdgcn)})
uint32_t atomicInc(uint32_t *A, uint32_t V, int Ordering) {
// builtin_amdgcn_atomic_inc32 should expand to this switch when
// passed a runtime value, but does not do so yet. Workaround here.
switch (Ordering) {
default:
__builtin_unreachable();
case __ATOMIC_RELAXED:
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELAXED, "");
case __ATOMIC_ACQUIRE:
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQUIRE, "");
case __ATOMIC_RELEASE:
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELEASE, "");
case __ATOMIC_ACQ_REL:
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQ_REL, "");
case __ATOMIC_SEQ_CST:
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_SEQ_CST, "");
}
uint32_t atomicInc(uint32_t *Address, uint32_t Val, int Ordering) {
return __builtin_amdgcn_atomic_inc32(Address, Val, Ordering, "");
}
uint32_t SHARED(namedBarrierTracker);
@ -141,52 +126,6 @@ void namedBarrier() {
fence::team(__ATOMIC_RELEASE);
}
// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
// so that it is usable within a template environment and so that a runtime
// value of the memory order is expanded to this switch within clang/llvm.
void fenceTeam(int Ordering) {
switch (Ordering) {
default:
__builtin_unreachable();
case __ATOMIC_ACQUIRE:
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
case __ATOMIC_RELEASE:
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");
case __ATOMIC_ACQ_REL:
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "workgroup");
case __ATOMIC_SEQ_CST:
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
}
}
void fenceKernel(int Ordering) {
switch (Ordering) {
default:
__builtin_unreachable();
case __ATOMIC_ACQUIRE:
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
case __ATOMIC_RELEASE:
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent");
case __ATOMIC_ACQ_REL:
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent");
case __ATOMIC_SEQ_CST:
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
}
}
void fenceSystem(int Ordering) {
switch (Ordering) {
default:
__builtin_unreachable();
case __ATOMIC_ACQUIRE:
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "");
case __ATOMIC_RELEASE:
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "");
case __ATOMIC_ACQ_REL:
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "");
case __ATOMIC_SEQ_CST:
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
}
}
void syncWarp(__kmpc_impl_lanemask_t) {
// AMDGCN doesn't need to sync threads in a warp
}
@ -194,12 +133,13 @@ void syncWarp(__kmpc_impl_lanemask_t) {
void syncThreads() { __builtin_amdgcn_s_barrier(); }
void syncThreadsAligned() { syncThreads(); }
// TODO: Don't have wavefront lane locks. Possibly can't have them.
void unsetLock(omp_lock_t *) { __builtin_trap(); }
int testLock(omp_lock_t *) { __builtin_trap(); }
void initLock(omp_lock_t *) { __builtin_trap(); }
void destroyLock(omp_lock_t *) { __builtin_trap(); }
void setLock(omp_lock_t *) { __builtin_trap(); }
void syncThreadsAligned() { syncThreads(); }
void fenceTeam(int Ordering) { __builtin_amdgcn_fence(Ordering, "workgroup"); }
void fenceKernel(int Ordering) { __builtin_amdgcn_fence(Ordering, "agent"); }
void fenceSystem(int Ordering) { __builtin_amdgcn_fence(Ordering, ""); }
#pragma omp end declare variant
///}

View File

@ -122,4 +122,3 @@ endif()
# Report to the parent scope that we are building a plugin for amdgpu
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa" PARENT_SCOPE)
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa-newRTL" PARENT_SCOPE)

View File

@ -2,7 +2,6 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>

View File

@ -2,7 +2,6 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <cstdio>
#include <cstdlib>

View File

@ -2,7 +2,6 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <cstdio>
#include <cstdlib>

View File

@ -2,7 +2,6 @@
// fails with error message 'Unable to generate target entries' on amdgcn
// XFAIL: amdgcn-amd-amdhsa
// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>
#include <omp.h>

View File

@ -2,7 +2,6 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>
#include <stdint.h>

View File

@ -3,7 +3,6 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <omp.h>
#include <stdio.h>

View File

@ -2,7 +2,6 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>

View File

@ -2,7 +2,6 @@
// amdgcn does not have printf definition
// UNSUPPORTED: amdgcn-amd-amdhsa
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>

View File

@ -2,7 +2,6 @@
// Wrong results on amdgcn
// UNSUPPORTED: amdgcn-amd-amdhsa
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
#include <iostream>

View File

@ -2,7 +2,7 @@
// Currently hangs on amdgpu
// UNSUPPORTED: amdgcn-amd-amdhsa
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
// UNSUPPORTED: x86_64-pc-linux-gnu
#include <cassert>

View File

@ -1,7 +1,6 @@
// RUN: %libomptarget-compilexx-and-run-generic
// UNSUPPORTED: amdgcn-amd-amdhsa
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
#include <cassert>
#include <iostream>

View File

@ -2,7 +2,6 @@
// Fails in DAGToDAG on an address space problem
// UNSUPPORTED: amdgcn-amd-amdhsa
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
#include <cmath>
#include <cstdio>

View File

@ -9,7 +9,6 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>
#include <omp.h>

View File

@ -4,7 +4,6 @@
// Fails on amdgcn with error: GPU Memory Error
// XFAIL: amdgcn-amd-amdhsa
// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>
#include <omp.h>

View File

@ -5,7 +5,6 @@
// Fails on amdgcn with error: GPU Memory Error
// XFAIL: amdgcn-amd-amdhsa
// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <omp.h>
#include <stdio.h>

View File

@ -5,7 +5,6 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <omp.h>
#include <stdio.h>

View File

@ -4,7 +4,6 @@
// amdgcn does not have printf definition
// XFAIL: amdgcn-amd-amdhsa
// XFAIL: amdgcn-amd-amdhsa-newRTL
#include <stdio.h>
#include <omp.h>