RegAlloc: Fix verifier error with undef identity copies

The code did not match the example in the comment, and was checking
the undef flag on the copy dest instead of source. The existing tests
were only hitting the > 2 operands case.

llvm-svn: 361156
This commit is contained in:
Matt Arsenault 2019-05-20 14:09:36 +00:00
parent 3e1821bf43
commit 7c8ec18964
2 changed files with 70 additions and 1 deletions

View File

@ -384,7 +384,7 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
// give us additional liveness information: The target (super-)register
// must not be valid before this point. Replace the COPY with a KILL
// instruction to maintain this information.
if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) {
if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) {
MI.setDesc(TII->get(TargetOpcode::KILL));
LLVM_DEBUG(dbgs() << " replace by: " << MI);
return;

View File

@ -0,0 +1,69 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -start-before=greedy -stop-after=virtregrewriter -verify-machineinstrs -o - %s | FileCheck %s
# The undef copy of %4 is allocated to $vgpr3, and the identity copy
# was deleted, and $vgpr3 was considered undef. The code to replace
# the undef copy with a kill was incorrectly checking the dest
# operand, rather than the source.
--- |
define amdgpu_kernel void @undef_identity_copy() {
ret void
}
declare hidden float @bar(<4 x float>)
declare hidden void @foo()
...
---
name: undef_identity_copy
tracksRegLiveness: true
frameInfo:
maxAlignment: 4
hasCalls: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
scratchWaveOffsetReg: '$sgpr95'
frameOffsetReg: '$sgpr95'
stackPtrOffsetReg: '$sgpr32'
body: |
bb.0:
; CHECK-LABEL: name: undef_identity_copy
; CHECK: renamable $vgpr32_vgpr33_vgpr34_vgpr35 = FLAT_LOAD_DWORDX4 undef renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1)
; CHECK: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
; CHECK: $sgpr4 = COPY $sgpr95
; CHECK: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @foo, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4
; CHECK: ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
; CHECK: renamable $sgpr6_sgpr7 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @bar + 4, target-flags(amdgpu-rel32-hi) @bar + 4, implicit-def dead $scc
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
; CHECK: $sgpr4 = COPY $sgpr95
; CHECK: $vgpr0 = COPY renamable $vgpr32
; CHECK: $vgpr1 = COPY renamable $vgpr33
; CHECK: $vgpr2 = COPY renamable $vgpr34
; CHECK: $vgpr3 = KILL undef renamable $vgpr3
; CHECK: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0
; CHECK: ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
; CHECK: FLAT_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
; CHECK: S_ENDPGM 0
%0:vreg_128 = FLAT_LOAD_DWORDX4 undef %1:vreg_64, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16, addrspace 1)
%2:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @foo + 4, target-flags(amdgpu-rel32-hi) @foo + 4, implicit-def dead $scc
ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
$sgpr4 = COPY $sgpr95
dead $sgpr30_sgpr31 = SI_CALL %2, @foo, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4
ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
%3:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @bar + 4, target-flags(amdgpu-rel32-hi) @bar + 4, implicit-def dead $scc
ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
$sgpr4 = COPY $sgpr95
$vgpr0 = COPY %0.sub0
$vgpr1 = COPY %0.sub1
$vgpr2 = COPY %0.sub2
$vgpr3 = COPY undef %4:vgpr_32
dead $sgpr30_sgpr31 = SI_CALL %3, @bar, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0, implicit killed $vgpr1, implicit killed $vgpr2, implicit killed $vgpr3, implicit-def $vgpr0
%5:vgpr_32 = COPY $vgpr0
ADJCALLSTACKDOWN 0, 4, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr95
FLAT_STORE_DWORD undef %6:vreg_64, %5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
S_ENDPGM 0
...