Revert "[libomptarget] Build DeviceRTL for amdgpu"
- more tests failing on CI than failed locally when writing this patch
This reverts commit 33427fdb7b
.
This commit is contained in:
parent
cf37a94c1e
commit
6c7b203d1d
|
@ -252,7 +252,7 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions(
|
|||
std::string BitcodeSuffix;
|
||||
if (DriverArgs.hasFlag(options::OPT_fopenmp_target_new_runtime,
|
||||
options::OPT_fno_openmp_target_new_runtime, false))
|
||||
BitcodeSuffix = "new-amdgpu-" + GPUArch;
|
||||
BitcodeSuffix = "new-amdgcn-" + GPUArch;
|
||||
else
|
||||
BitcodeSuffix = "amdgcn-" + GPUArch;
|
||||
|
||||
|
|
|
@ -226,5 +226,6 @@ foreach(sm ${nvptx_sm_list})
|
|||
endforeach()
|
||||
|
||||
foreach(mcpu ${amdgpu_mcpus})
|
||||
compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -fvisibility=default -nogpulib)
|
||||
# require D112227 or similar to enable the compilation for amdgpu
|
||||
# compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -fvisibility=default -nogpulib)
|
||||
endforeach()
|
||||
|
|
|
@ -20,9 +20,9 @@ using namespace _OMP;
|
|||
|
||||
#pragma omp declare target
|
||||
|
||||
extern uint32_t __omp_rtl_debug_kind; // defined by CGOpenMPRuntimeGPU
|
||||
extern uint32_t __omp_rtl_debug_kind;
|
||||
|
||||
// TODO: We want to change the name as soon as the old runtime is gone.
|
||||
// TOOD: We want to change the name as soon as the old runtime is gone.
|
||||
DeviceEnvironmentTy CONSTANT(omptarget_device_environment)
|
||||
__attribute__((used));
|
||||
|
||||
|
|
|
@ -68,23 +68,8 @@ uint64_t atomicAdd(uint64_t *Address, uint64_t Val, int Ordering) {
|
|||
///{
|
||||
#pragma omp begin declare variant match(device = {arch(amdgcn)})
|
||||
|
||||
uint32_t atomicInc(uint32_t *A, uint32_t V, int Ordering) {
|
||||
// builtin_amdgcn_atomic_inc32 should expand to this switch when
|
||||
// passed a runtime value, but does not do so yet. Workaround here.
|
||||
switch (Ordering) {
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
case __ATOMIC_RELAXED:
|
||||
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELAXED, "");
|
||||
case __ATOMIC_ACQUIRE:
|
||||
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQUIRE, "");
|
||||
case __ATOMIC_RELEASE:
|
||||
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_RELEASE, "");
|
||||
case __ATOMIC_ACQ_REL:
|
||||
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_ACQ_REL, "");
|
||||
case __ATOMIC_SEQ_CST:
|
||||
return __builtin_amdgcn_atomic_inc32(A, V, __ATOMIC_SEQ_CST, "");
|
||||
}
|
||||
uint32_t atomicInc(uint32_t *Address, uint32_t Val, int Ordering) {
|
||||
return __builtin_amdgcn_atomic_inc32(Address, Val, Ordering, "");
|
||||
}
|
||||
|
||||
uint32_t SHARED(namedBarrierTracker);
|
||||
|
@ -141,52 +126,6 @@ void namedBarrier() {
|
|||
fence::team(__ATOMIC_RELEASE);
|
||||
}
|
||||
|
||||
// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
|
||||
// so that it is usable within a template environment and so that a runtime
|
||||
// value of the memory order is expanded to this switch within clang/llvm.
|
||||
void fenceTeam(int Ordering) {
|
||||
switch (Ordering) {
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
case __ATOMIC_ACQUIRE:
|
||||
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
|
||||
case __ATOMIC_RELEASE:
|
||||
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");
|
||||
case __ATOMIC_ACQ_REL:
|
||||
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "workgroup");
|
||||
case __ATOMIC_SEQ_CST:
|
||||
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
|
||||
}
|
||||
}
|
||||
void fenceKernel(int Ordering) {
|
||||
switch (Ordering) {
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
case __ATOMIC_ACQUIRE:
|
||||
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
|
||||
case __ATOMIC_RELEASE:
|
||||
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent");
|
||||
case __ATOMIC_ACQ_REL:
|
||||
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "agent");
|
||||
case __ATOMIC_SEQ_CST:
|
||||
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
|
||||
}
|
||||
}
|
||||
void fenceSystem(int Ordering) {
|
||||
switch (Ordering) {
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
case __ATOMIC_ACQUIRE:
|
||||
return __builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "");
|
||||
case __ATOMIC_RELEASE:
|
||||
return __builtin_amdgcn_fence(__ATOMIC_RELEASE, "");
|
||||
case __ATOMIC_ACQ_REL:
|
||||
return __builtin_amdgcn_fence(__ATOMIC_ACQ_REL, "");
|
||||
case __ATOMIC_SEQ_CST:
|
||||
return __builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "");
|
||||
}
|
||||
}
|
||||
|
||||
void syncWarp(__kmpc_impl_lanemask_t) {
|
||||
// AMDGCN doesn't need to sync threads in a warp
|
||||
}
|
||||
|
@ -194,12 +133,13 @@ void syncWarp(__kmpc_impl_lanemask_t) {
|
|||
void syncThreads() { __builtin_amdgcn_s_barrier(); }
|
||||
void syncThreadsAligned() { syncThreads(); }
|
||||
|
||||
// TODO: Don't have wavefront lane locks. Possibly can't have them.
|
||||
void unsetLock(omp_lock_t *) { __builtin_trap(); }
|
||||
int testLock(omp_lock_t *) { __builtin_trap(); }
|
||||
void initLock(omp_lock_t *) { __builtin_trap(); }
|
||||
void destroyLock(omp_lock_t *) { __builtin_trap(); }
|
||||
void setLock(omp_lock_t *) { __builtin_trap(); }
|
||||
void syncThreadsAligned() { syncThreads(); }
|
||||
|
||||
void fenceTeam(int Ordering) { __builtin_amdgcn_fence(Ordering, "workgroup"); }
|
||||
|
||||
void fenceKernel(int Ordering) { __builtin_amdgcn_fence(Ordering, "agent"); }
|
||||
|
||||
void fenceSystem(int Ordering) { __builtin_amdgcn_fence(Ordering, ""); }
|
||||
|
||||
#pragma omp end declare variant
|
||||
///}
|
||||
|
|
|
@ -122,4 +122,3 @@ endif()
|
|||
|
||||
# Report to the parent scope that we are building a plugin for amdgpu
|
||||
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa" PARENT_SCOPE)
|
||||
set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} amdgcn-amd-amdhsa-newRTL" PARENT_SCOPE)
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// amdgcn does not have printf definition
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// amdgcn does not have printf definition
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// amdgcn does not have printf definition
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// fails with error message 'Unable to generate target entries' on amdgcn
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <stdio.h>
|
||||
#include <omp.h>
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// amdgcn does not have printf definition
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
// amdgcn does not have printf definition
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// amdgcn does not have printf definition
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// amdgcn does not have printf definition
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// Wrong results on amdgcn
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <iostream>
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
// Currently hangs on amdgpu
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
// UNSUPPORTED: x86_64-pc-linux-gnu
|
||||
|
||||
#include <cassert>
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
// RUN: %libomptarget-compilexx-and-run-generic
|
||||
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
// Fails in DAGToDAG on an address space problem
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa
|
||||
// UNSUPPORTED: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
|
||||
// amdgcn does not have printf definition
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <stdio.h>
|
||||
#include <omp.h>
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
|
||||
// Fails on amdgcn with error: GPU Memory Error
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <stdio.h>
|
||||
#include <omp.h>
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
// Fails on amdgcn with error: GPU Memory Error
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
// amdgcn does not have printf definition
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <omp.h>
|
||||
#include <stdio.h>
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
|
||||
// amdgcn does not have printf definition
|
||||
// XFAIL: amdgcn-amd-amdhsa
|
||||
// XFAIL: amdgcn-amd-amdhsa-newRTL
|
||||
|
||||
#include <stdio.h>
|
||||
#include <omp.h>
|
||||
|
|
Loading…
Reference in New Issue