AMDGPU: Remove error on calls for amdgcn
Repurpose the -amdgpu-function-calls flag. Rather than require it to emit a call, only use it to run the always inline path or not. llvm-svn: 310003
This commit is contained in:
parent
817c253e60
commit
a202538bfa
|
@ -290,11 +290,6 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
|
|||
|
||||
AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
|
||||
|
||||
bool AMDGPUTargetMachine::enableFunctionCalls() const {
|
||||
return EnableAMDGPUFunctionCalls &&
|
||||
getTargetTriple().getArch() == Triple::amdgcn;
|
||||
}
|
||||
|
||||
StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
|
||||
Attribute GPUAttr = F.getFnAttribute("target-cpu");
|
||||
return GPUAttr.hasAttribute(Attribute::None) ?
|
||||
|
@ -503,8 +498,10 @@ class GCNPassConfig final : public AMDGPUPassConfig {
|
|||
public:
|
||||
GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
|
||||
: AMDGPUPassConfig(TM, PM) {
|
||||
// It is necessary to know the register usage of the entire call graph.
|
||||
setRequiresCodeGenSCCOrder(EnableAMDGPUFunctionCalls);
|
||||
// It is necessary to know the register usage of the entire call graph. We
|
||||
// allow calls without EnableAMDGPUFunctionCalls if they are marked
|
||||
// noinline, so this is always required.
|
||||
setRequiresCodeGenSCCOrder(true);
|
||||
}
|
||||
|
||||
GCNTargetMachine &getGCNTargetMachine() const {
|
||||
|
@ -571,15 +568,18 @@ void AMDGPUPassConfig::addIRPasses() {
|
|||
|
||||
addPass(createAMDGPULowerIntrinsicsPass());
|
||||
|
||||
// Function calls are not supported, so make sure we inline everything.
|
||||
addPass(createAMDGPUAlwaysInlinePass());
|
||||
addPass(createAlwaysInlinerLegacyPass());
|
||||
// We need to add the barrier noop pass, otherwise adding the function
|
||||
// inlining pass will cause all of the PassConfigs passes to be run
|
||||
// one function at a time, which means if we have a nodule with two
|
||||
// functions, then we will generate code for the first function
|
||||
// without ever running any passes on the second.
|
||||
addPass(createBarrierNoopPass());
|
||||
if (TM.getTargetTriple().getArch() == Triple::r600 ||
|
||||
!EnableAMDGPUFunctionCalls) {
|
||||
// Function calls are not supported, so make sure we inline everything.
|
||||
addPass(createAMDGPUAlwaysInlinePass());
|
||||
addPass(createAlwaysInlinerLegacyPass());
|
||||
// We need to add the barrier noop pass, otherwise adding the function
|
||||
// inlining pass will cause all of the PassConfigs passes to be run
|
||||
// one function at a time, which means if we have a nodule with two
|
||||
// functions, then we will generate code for the first function
|
||||
// without ever running any passes on the second.
|
||||
addPass(createBarrierNoopPass());
|
||||
}
|
||||
|
||||
if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
|
||||
// TODO: May want to move later or split into an early and late one.
|
||||
|
|
|
@ -69,9 +69,6 @@ public:
|
|||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
LLVM_READONLY
|
||||
bool enableFunctionCalls() const;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -1962,11 +1962,6 @@ void SITargetLowering::passSpecialInputs(
|
|||
// The wave scratch offset register is used as the global base pointer.
|
||||
SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
|
||||
SmallVectorImpl<SDValue> &InVals) const {
|
||||
const AMDGPUTargetMachine &TM =
|
||||
static_cast<const AMDGPUTargetMachine &>(getTargetMachine());
|
||||
if (!TM.enableFunctionCalls())
|
||||
return AMDGPUTargetLowering::LowerCall(CLI, InVals);
|
||||
|
||||
SelectionDAG &DAG = CLI.DAG;
|
||||
const SDLoc &DL = CLI.DL;
|
||||
SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
define void @void_func_void() #2 {
|
||||
ret void
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
|
||||
|
||||
%struct.ByValStruct = type { [4 x i32] }
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-function-calls -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,MESA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=hawaii -amdgpu-function-calls -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MESA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-function-calls -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,VI,MESA %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-function-calls -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,HSA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,MESA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=hawaii -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MESA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,VI,MESA %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,HSA %s
|
||||
|
||||
declare void @external_void_func_i1(i1) #0
|
||||
declare void @external_void_func_i1_signext(i1 signext) #0
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-function-calls -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=fiji -d - | FileCheck -check-prefixes=GCN,VI %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-function-calls -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=gfx900 -d - | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; XUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -amdgpu-function-calls -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=hawaii -d - | FileCheck -check-prefixes=GCN,CI %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=fiji -d - | FileCheck -check-prefixes=GCN,VI %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=gfx900 -d - | FileCheck -check-prefixes=GCN,GFX9 %s
|
||||
; XUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -filetype=obj -verify-machineinstrs < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=hawaii -d - | FileCheck -check-prefixes=GCN,CI %s
|
||||
|
||||
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GCN-NEXT: s_setpc_b64
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
|
||||
|
||||
; Make sure to run a GPU with the SGPR allocation bug.
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
declare void @external_void_func_void() #0
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
declare void @external_void_func_void() #0
|
||||
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck %s
|
||||
; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck %s
|
||||
; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: in function test_call_external{{.*}}: unsupported call to function external_function
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc -amdgpu-function-calls -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s
|
||||
; RUN: llc -amdgpu-function-calls -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
|
||||
|
||||
; GCN-LABEL: {{^}}callee_no_stack:
|
||||
; GCN: ; BB#0:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: llc -amdgpu-function-calls -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
|
||||
; RUN: llc -amdgpu-function-calls -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
|
||||
|
||||
; GCN-LABEL: {{^}}use_dispatch_ptr:
|
||||
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -amdgpu-function-calls -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}use_workitem_id_x:
|
||||
; GCN: s_waitcnt
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-function-calls -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
||||
|
||||
; Test that non-entry function frame indices are expanded properly to
|
||||
; give an index relative to the scratch wave offset register
|
||||
|
@ -144,6 +144,7 @@ define void @func_other_fi_user_non_inline_imm_offset_i32() #0 {
|
|||
store volatile i32 %mul, i32 addrspace(3)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}func_other_fi_user_non_inline_imm_offset_i32_vcc_live:
|
||||
; GCN: s_sub_u32 [[DIFF:s[0-9]+]], s5, s4
|
||||
; GCN-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[DIFF]], 6
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-NOENV %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-opencl -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-OPENCL %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-NOENV %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa-opencl -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,HSA,HSA-OPENCL %s
|
||||
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,MESA %s
|
||||
|
||||
; GCN-LABEL: {{^}}kernel_implicitarg_ptr_empty:
|
||||
; GCN: enable_sgpr_kernarg_segment_ptr = 1
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
; RUN: not llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s
|
||||
; RUN: not llc -march=r600 < %s 2>&1 | FileCheck -check-prefix=ERROR %s
|
||||
; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
declare i32 @memcmp(i8 addrspace(1)* readonly nocapture, i8 addrspace(1)* readonly nocapture, i64) #0
|
||||
declare i8 addrspace(1)* @memchr(i8 addrspace(1)* readonly nocapture, i32, i64) #1
|
||||
|
@ -9,6 +10,9 @@ declare i32 @strcmp(i8* nocapture, i8* nocapture) #1
|
|||
|
||||
|
||||
; ERROR: error: <unknown>:0:0: in function test_memcmp void (i8 addrspace(1)*, i8 addrspace(1)*, i32*): unsupported call to function memcmp
|
||||
|
||||
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@lo+4
|
||||
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memcmp@rel32@hi+4
|
||||
define amdgpu_kernel void @test_memcmp(i8 addrspace(1)* %x, i8 addrspace(1)* %y, i32* nocapture %p) #0 {
|
||||
entry:
|
||||
%cmp = tail call i32 @memcmp(i8 addrspace(1)* %x, i8 addrspace(1)* %y, i64 2)
|
||||
|
@ -17,6 +21,9 @@ entry:
|
|||
}
|
||||
|
||||
; ERROR: error: <unknown>:0:0: in function test_memchr void (i8 addrspace(1)*, i32, i64): unsupported call to function memchr
|
||||
|
||||
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@lo+4
|
||||
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, memchr@rel32@hi+4
|
||||
define amdgpu_kernel void @test_memchr(i8 addrspace(1)* %src, i32 %char, i64 %len) #0 {
|
||||
%res = call i8 addrspace(1)* @memchr(i8 addrspace(1)* %src, i32 %char, i64 %len)
|
||||
store volatile i8 addrspace(1)* %res, i8 addrspace(1)* addrspace(1)* undef
|
||||
|
@ -24,6 +31,9 @@ define amdgpu_kernel void @test_memchr(i8 addrspace(1)* %src, i32 %char, i64 %le
|
|||
}
|
||||
|
||||
; ERROR: error: <unknown>:0:0: in function test_strcpy void (i8*, i8*): unsupported call to function strcpy
|
||||
|
||||
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@lo+4
|
||||
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcpy@rel32@hi+4
|
||||
define amdgpu_kernel void @test_strcpy(i8* %dst, i8* %src) #0 {
|
||||
%res = call i8* @strcpy(i8* %dst, i8* %src)
|
||||
store volatile i8* %res, i8* addrspace(1)* undef
|
||||
|
@ -31,6 +41,9 @@ define amdgpu_kernel void @test_strcpy(i8* %dst, i8* %src) #0 {
|
|||
}
|
||||
|
||||
; ERROR: error: <unknown>:0:0: in function test_strcmp void (i8*, i8*): unsupported call to function strcmp
|
||||
|
||||
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@lo+4
|
||||
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strcmp@rel32@hi+4
|
||||
define amdgpu_kernel void @test_strcmp(i8* %src0, i8* %src1) #0 {
|
||||
%res = call i32 @strcmp(i8* %src0, i8* %src1)
|
||||
store volatile i32 %res, i32 addrspace(1)* undef
|
||||
|
@ -38,6 +51,9 @@ define amdgpu_kernel void @test_strcmp(i8* %src0, i8* %src1) #0 {
|
|||
}
|
||||
|
||||
; ERROR: error: <unknown>:0:0: in function test_strlen void (i8*): unsupported call to function strlen
|
||||
|
||||
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@lo+4
|
||||
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strlen@rel32@hi+4
|
||||
define amdgpu_kernel void @test_strlen(i8* %src) #0 {
|
||||
%res = call i32 @strlen(i8* %src)
|
||||
store volatile i32 %res, i32 addrspace(1)* undef
|
||||
|
@ -45,6 +61,9 @@ define amdgpu_kernel void @test_strlen(i8* %src) #0 {
|
|||
}
|
||||
|
||||
; ERROR: error: <unknown>:0:0: in function test_strnlen void (i8*, i32): unsupported call to function strnlen
|
||||
|
||||
; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@lo+4
|
||||
; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, strnlen@rel32@hi+4
|
||||
define amdgpu_kernel void @test_strnlen(i8* %src, i32 %size) #0 {
|
||||
%res = call i32 @strnlen(i8* %src, i32 %size)
|
||||
store volatile i32 %res, i32 addrspace(1)* undef
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-sroa=0 -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=hawaii -amdgpu-function-calls -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-sroa=0 -amdgpu-function-calls -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=hawaii -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=CI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 -check-prefix=VI %s
|
||||
|
||||
; Test calls when called by other callable functions rather than
|
||||
; kernels.
|
||||
|
|
|
@ -1,21 +0,0 @@
|
|||
; RUN: not llc -verify-machineinstrs -march=amdgcn %s -o /dev/null 2>&1 | FileCheck %s
|
||||
; RUN: not llc -verify-machineinstrs -march=amdgcn -mcpu=tonga %s -o /dev/null 2>&1 | FileCheck %s
|
||||
|
||||
; Make sure promote alloca pass doesn't crash
|
||||
|
||||
; CHECK: unsupported call
|
||||
|
||||
declare i32 @foo(i32*) nounwind
|
||||
|
||||
define amdgpu_kernel void @call_private(i32 addrspace(1)* %out, i32 %in) nounwind {
|
||||
entry:
|
||||
%tmp = alloca [2 x i32]
|
||||
%tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
|
||||
%tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
|
||||
store i32 0, i32* %tmp1
|
||||
store i32 1, i32* %tmp2
|
||||
%tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
|
||||
%val = call i32 @foo(i32* %tmp3) nounwind
|
||||
store i32 %val, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
|
@ -1,19 +1,21 @@
|
|||
; RUN: not llc -march=amdgcn < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: LLVM ERROR: indirect calls not handled
|
||||
|
||||
; Make sure that AMDGPUPromoteAlloca doesn't crash if the called
|
||||
; function is a constantexpr cast of a function.
|
||||
|
||||
declare void @foo(float*) #0
|
||||
declare void @foo.varargs(...) #0
|
||||
|
||||
; CHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo
|
||||
; XCHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo
|
||||
define amdgpu_kernel void @crash_call_constexpr_cast() #0 {
|
||||
%alloca = alloca i32
|
||||
call void bitcast (void (float*)* @foo to void (i32*)*)(i32* %alloca) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo.varargs
|
||||
; XCHECK: in function crash_call_constexpr_cast{{.*}}: unsupported call to function foo.varargs
|
||||
define amdgpu_kernel void @crash_call_constexpr_cast_varargs() #0 {
|
||||
%alloca = alloca i32
|
||||
call void bitcast (void (...)* @foo.varargs to void (i32*)*)(i32* %alloca) #0
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-promote-alloca < %s | FileCheck -check-prefix=IR %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=ASM %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=ASM %s
|
||||
|
||||
; IR-LABEL: define amdgpu_vs void @promote_alloca_shaders(i32 addrspace(1)* inreg %out, i32 addrspace(1)* inreg %in) #0 {
|
||||
; IR: alloca [5 x i32]
|
||||
|
@ -74,6 +74,26 @@ entry:
|
|||
ret void
|
||||
}
|
||||
|
||||
declare i32 @foo(i32*) #0
|
||||
|
||||
; ASM-LABEL: {{^}}call_private:
|
||||
; ASM: buffer_store_dword
|
||||
; ASM: buffer_store_dword
|
||||
; ASM: s_swappc_b64
|
||||
; ASM: ScratchSize: 16396
|
||||
define amdgpu_kernel void @call_private(i32 addrspace(1)* %out, i32 %in) #0 {
|
||||
entry:
|
||||
%tmp = alloca [2 x i32]
|
||||
%tmp1 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 0
|
||||
%tmp2 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 1
|
||||
store i32 0, i32* %tmp1
|
||||
store i32 1, i32* %tmp2
|
||||
%tmp3 = getelementptr [2 x i32], [2 x i32]* %tmp, i32 0, i32 %in
|
||||
%val = call i32 @foo(i32* %tmp3)
|
||||
store i32 %val, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
||||
attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" }
|
||||
|
|
Loading…
Reference in New Issue