From d664315ae896174b49b1feadea75a26dcfd209f2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 24 Aug 2017 07:55:15 +0000 Subject: [PATCH] IPRA: Don't assume called function is first call operand Fixes not finding the called global for AMDGPU call pseudoinstructions, which prevented IPRA from doing much. llvm-svn: 311637 --- llvm/lib/CodeGen/RegUsageInfoPropagate.cpp | 28 +++++--- llvm/test/CodeGen/AMDGPU/ipra.ll | 84 +++++++++++++++++++++- 2 files changed, 103 insertions(+), 9 deletions(-) diff --git a/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp index a39c9457b3a4..f6d45067816a 100644 --- a/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp +++ b/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp @@ -88,6 +88,19 @@ void RegUsageInfoPropagationPass::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +// Assumes call instructions have a single reference to a function. +static const Function *findCalledFunction(const Module &M, MachineInstr &MI) { + for (MachineOperand &MO : MI.operands()) { + if (MO.isGlobal()) + return dyn_cast(MO.getGlobal()); + + if (MO.isSymbol()) + return M.getFunction(MO.getSymbolName()); + } + + return nullptr; +} + bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) { const Module *M = MF.getFunction()->getParent(); PhysicalRegisterUsageInfo *PRUI = &getAnalysis(); @@ -118,15 +131,14 @@ bool RegUsageInfoPropagationPass::runOnMachineFunction(MachineFunction &MF) { Changed = true; }; - MachineOperand &Operand = MI.getOperand(0); - if (Operand.isGlobal()) - UpdateRegMask(cast(Operand.getGlobal())); - else if (Operand.isSymbol()) - UpdateRegMask(M->getFunction(Operand.getSymbolName())); + if (const Function *F = findCalledFunction(*M, MI)) { + UpdateRegMask(F); + } else { + DEBUG(dbgs() << "Failed to find call target function\n"); + } - DEBUG(dbgs() - << "Call Instruction After Register Usage Info Propagation : \n"); - DEBUG(dbgs() << MI << "\n"); + DEBUG(dbgs() << "Call Instruction After Register Usage Info Propagation : " + << MI << '\n'); } } diff --git a/llvm/test/CodeGen/AMDGPU/ipra.ll b/llvm/test/CodeGen/AMDGPU/ipra.ll index 53752b24f406..803855cd0327 100644 --- a/llvm/test/CodeGen/AMDGPU/ipra.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN %s ; Kernels are not called, so there is no call preserved mask. ; GCN-LABEL: {{^}}kernel: @@ -9,4 +9,86 @@ entry: ret void } +; GCN-LABEL: {{^}}func: +; GCN: ; NumVgprs: 8 +define void @func() #1 { + call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0 + ret void +} + +; GCN-LABEL: {{^}}kernel_call: +; GCN-NOT: buffer_store +; GCN-NOT: buffer_load +; GCN-NOT: readlane +; GCN-NOT: writelane +; GCN: flat_load_dword v8 +; GCN: s_swappc_b64 +; GCN-NOT: buffer_store +; GCN-NOT: buffer_load +; GCN-NOT: readlane +; GCN-NOT: writelane +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 + +; GCN: ; NumSgprs: 37 +; GCN: ; NumVgprs: 9 +define amdgpu_kernel void @kernel_call() #0 { + %vgpr = load volatile i32, i32 addrspace(1)* undef + tail call void @func() + store volatile i32 %vgpr, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}func_regular_call: +; GCN-NOT: buffer_store +; GCN-NOT: buffer_load +; GCN-NOT: readlane +; GCN-NOT: writelane +; GCN: flat_load_dword v8 +; GCN: s_swappc_b64 +; GCN-NOT: buffer_store +; GCN-NOT: buffer_load +; GCN-NOT: readlane +; GCN-NOT: writelane +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 + +; GCN: ; NumSgprs: 32 +; GCN: ; NumVgprs: 9 +define void @func_regular_call() #1 { + %vgpr = load volatile i32, i32 addrspace(1)* undef + tail call void @func() + store volatile i32 %vgpr, i32 addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}func_tail_call: +; GCN: s_waitcnt +; GCN-NEXT: s_getpc_b64 s[6:7] +; GCN-NEXT: s_add_u32 s6, +; GCN-NEXT: s_addc_u32 s7, +; GCN-NEXT: s_setpc_b64 s[6:7] + +; GCN: ; NumSgprs: 32 +; GCN: ; NumVgprs: 8 +define void @func_tail_call() #1 { + tail call void @func() + ret void +} + +; GCN-LABEL: {{^}}func_call_tail_call: +; GCN: flat_load_dword v8 +; GCN: s_swappc_b64 +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 +; GCN: s_setpc_b64 + +; GCN: ; NumSgprs: 32 +; GCN: ; NumVgprs: 9 +define void @func_call_tail_call() #1 { + %vgpr = load volatile i32, i32 addrspace(1)* undef + tail call void @func() + store volatile i32 %vgpr, i32 addrspace(1)* undef + tail call void @func() + ret void +} + attributes #0 = { nounwind } +attributes #1 = { nounwind noinline }