From 921f7a27cc4e92f0af53c2d454461e41ea3b6f4f Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 25 Jun 2018 16:05:55 +0000 Subject: [PATCH] StackSlotColoring: Decide colors per stack ID I thought I fixed this in r308673, but that fix was very broken. The assumption that any frame index can be used in place of another was more widespread than I realized. Even when stack slot sharing was disabled, this was still replacing frame index uses with a different ID with a different stack slot. Really fix this by doing the coloring per-stack ID, so all of the coloring logically done in a separate namespace. This is a lot simpler than trying to figure out how to change the color if the stack ID is different. llvm-svn: 335488 --- llvm/include/llvm/CodeGen/MachineFrameInfo.h | 3 + llvm/lib/CodeGen/StackSlotColoring.cpp | 72 +++++++++----- .../AMDGPU/sgpr-spill-wrong-stack-id.mir | 97 +++++++++++++++++++ 3 files changed, 150 insertions(+), 22 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 91b506c15245..48ee077161df 100644 --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -123,6 +123,9 @@ class MachineFrameInfo { /// necessarily reside in the same contiguous memory block as other stack /// objects. Objects with differing stack IDs should not be merged or /// replaced substituted for each other. + // + /// It is assumed a target uses consecutive, increasing stack IDs starting + /// from 1. uint8_t StackID; /// If this stack object is originated from an Alloca instruction diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp index b3ffebca6133..eb15b15a24a6 100644 --- a/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -82,14 +82,14 @@ namespace { // AllColors - If index is set, it's a spill slot, i.e. color. // FIXME: This assumes PEI locate spill slot with smaller indices // closest to stack pointer / frame pointer. Therefore, smaller - // index == better color. - BitVector AllColors; + // index == better color. This is per stack ID. + SmallVector AllColors; - // NextColor - Next "color" that's not yet used. - int NextColor = -1; + // NextColor - Next "color" that's not yet used. This is per stack ID. + SmallVector NextColors = { -1 }; - // UsedColors - "Colors" that have been assigned. - BitVector UsedColors; + // UsedColors - "Colors" that have been assigned. This is per stack ID + SmallVector UsedColors; // Assignments - Color to intervals mapping. SmallVector, 16> Assignments; @@ -196,10 +196,15 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { /// to a sorted (by weight) list. void StackSlotColoring::InitializeSlots() { int LastFI = MFI->getObjectIndexEnd(); + + // There is always at least one stack ID. + AllColors.resize(1); + UsedColors.resize(1); + OrigAlignments.resize(LastFI); OrigSizes.resize(LastFI); - AllColors.resize(LastFI); - UsedColors.resize(LastFI); + AllColors[0].resize(LastFI); + UsedColors[0].resize(LastFI); Assignments.resize(LastFI); using Pair = std::iterator_traits::value_type; @@ -220,18 +225,31 @@ void StackSlotColoring::InitializeSlots() { int FI = TargetRegisterInfo::stackSlot2Index(li.reg); if (MFI->isDeadObjectIndex(FI)) continue; + SSIntervals.push_back(&li); OrigAlignments[FI] = MFI->getObjectAlignment(FI); OrigSizes[FI] = MFI->getObjectSize(FI); - AllColors.set(FI); + + auto StackID = MFI->getStackID(FI); + if (StackID != 0) { + AllColors.resize(StackID + 1); + UsedColors.resize(StackID + 1); + AllColors[StackID].resize(LastFI); + UsedColors[StackID].resize(LastFI); + } + + AllColors[StackID].set(FI); } LLVM_DEBUG(dbgs() << '\n'); // Sort them by weight. std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); + NextColors.resize(AllColors.size()); + // Get first "color". - NextColor = AllColors.find_first(); + for (unsigned I = 0, E = AllColors.size(); I != E; ++I) + NextColors[I] = AllColors[I].find_first(); } /// OverlapWithAssignments - Return true if LiveInterval overlaps with any @@ -252,17 +270,19 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) { int Color = -1; bool Share = false; int FI = TargetRegisterInfo::stackSlot2Index(li->reg); + uint8_t StackID = MFI->getStackID(FI); if (!DisableSharing) { + // Check if it's possible to reuse any of the used colors. - Color = UsedColors.find_first(); + Color = UsedColors[StackID].find_first(); while (Color != -1) { if (!OverlapWithAssignments(li, Color)) { Share = true; ++NumEliminated; break; } - Color = UsedColors.find_next(Color); + Color = UsedColors[StackID].find_next(Color); } } @@ -274,12 +294,14 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) { // Assign it to the first available color (assumed to be the best) if it's // not possible to share a used color with other objects. if (!Share) { - assert(NextColor != -1 && "No more spill slots?"); - Color = NextColor; - UsedColors.set(Color); - NextColor = AllColors.find_next(NextColor); + assert(NextColors[StackID] != -1 && "No more spill slots?"); + Color = NextColors[StackID]; + UsedColors[StackID].set(Color); + NextColors[StackID] = AllColors[StackID].find_next(NextColors[StackID]); } + assert(MFI->getStackID(Color) == MFI->getStackID(FI)); + // Record the assignment. Assignments[Color].push_back(li); LLVM_DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); @@ -357,11 +379,13 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { } // Delete unused stack slots. - while (NextColor != -1) { - LLVM_DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor - << "\n"); - MFI->RemoveStackObject(NextColor); - NextColor = AllColors.find_next(NextColor); + for (int StackID = 0, E = AllColors.size(); StackID != E; ++StackID) { + int NextColor = NextColors[StackID]; + while (NextColor != -1) { + LLVM_DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n"); + MFI->RemoveStackObject(NextColor); + NextColor = AllColors[StackID].find_next(NextColor); + } } return true; @@ -383,6 +407,8 @@ void StackSlotColoring::RewriteInstruction(MachineInstr &MI, int NewFI = SlotMapping[OldFI]; if (NewFI == -1 || NewFI == OldFI) continue; + + assert(MFI->getStackID(OldFI) == MFI->getStackID(NewFI)); MO.setIndex(NewFI); } @@ -487,7 +513,9 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { InitializeSlots(); Changed = ColorSlots(MF); - NextColor = -1; + for (int &Next : NextColors) + Next = -1; + SSIntervals.clear(); for (unsigned i = 0, e = SSRefs.size(); i != e; ++i) SSRefs[i].clear(); diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir new file mode 100644 index 000000000000..be8b207a7408 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-wrong-stack-id.mir @@ -0,0 +1,97 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stress-regalloc=3 -start-before=greedy -stop-after=stack-slot-coloring -o - %s | FileCheck -check-prefixes=SHARE,GCN %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stress-regalloc=3 -start-before=greedy -stop-after=stack-slot-coloring -no-stack-slot-sharing -o - %s | FileCheck -check-prefixes=NOSHARE,GCN %s + +# Make sure that stack slot coloring doesn't try to merge frame +# indexes used for SGPR spilling with those that aren't. +# Even when stack slot sharing was disabled, it was still moving the +# FI ID used for an SGPR spill to a normal frame index. + +--- | + + define void @sgpr_spill_wrong_stack_id(float addrspace(1)* nocapture readnone %arg, float addrspace(1)* noalias %arg1) { + bb: + %tmp = load i32, i32 addrspace(1)* null, align 4 + call void @func(i32 undef) + call void @func(i32 %tmp) + unreachable + } + + declare void @func(i32) + +... +--- + +# GCN-LABEL: name: sgpr_spill_wrong_stack_id +# SHARE: stack: +# SHARE: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, +# SHARE: stack-id: 0, callee-saved-register: '', callee-saved-restored: true, +# SHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# SHARE: - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4, +# SHARE: stack-id: 1, callee-saved-register: '', callee-saved-restored: true, +# SHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# SHARE: - { id: 2, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, +# SHARE: stack-id: 1, callee-saved-register: '', callee-saved-restored: true, +# SHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + +# SHARE: SI_SPILL_S32_SAVE $sgpr5, %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 4 into %stack.2, addrspace 5) +# SHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) +# SHARE: SI_SPILL_S64_SAVE killed renamable $sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 8 into %stack.1, align 4, addrspace 5) +# SHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5) +# SHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0 +# SHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.2, addrspace 5) +# SHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) +# SHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5) +# SHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0 +# SHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.2, addrspace 5) + +# NOSHARE: stack: +# NOSHARE: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, +# NOSHARE: stack-id: 0, callee-saved-register: '', callee-saved-restored: true, +# NOSHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# NOSHARE: - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4, +# NOSHARE: stack-id: 1, callee-saved-register: '', callee-saved-restored: true, +# NOSHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# NOSHARE: - { id: 2, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, +# NOSHARE: stack-id: 1, callee-saved-register: '', callee-saved-restored: true, +# NOSHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# NOSHARE: - { id: 3, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, +# NOSHARE: stack-id: 1, callee-saved-register: '', callee-saved-restored: true, +# NOSHARE: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + +# NOSHARE: SI_SPILL_S32_SAVE $sgpr5, %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 4 into %stack.2, addrspace 5) +# NOSHARE: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) +# NOSHARE: SI_SPILL_S64_SAVE killed renamable $sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 8 into %stack.1, align 4, addrspace 5) +# NOSHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5) +# NOSHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0 +# NOSHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.2, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.2, addrspace 5) +# NOSHARE: SI_SPILL_S32_SAVE $sgpr5, %stack.3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (store 4 into %stack.3, addrspace 5) +# NOSHARE: $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr5, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) +# NOSHARE: renamable $sgpr6_sgpr7 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 8 from %stack.1, align 4, addrspace 5) +# NOSHARE: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr6_sgpr7, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit $vgpr0 +# NOSHARE: $sgpr5 = SI_SPILL_S32_RESTORE %stack.3, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr5 :: (load 4 from %stack.3, addrspace 5) + +... + +name: sgpr_spill_wrong_stack_id +tracksRegLiveness: true +frameInfo: + adjustsStack: false + hasCalls: true +body: | + bb.0.bb: + %8:sreg_32_xm0 = COPY $sgpr5 + %4:vreg_64 = IMPLICIT_DEF + %3:vgpr_32 = FLAT_LOAD_DWORD %4, 0, 0, 0, implicit $exec, implicit $flat_scr + %5:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @func + 4, target-flags(amdgpu-rel32-hi) @func + 4, implicit-def dead $scc + ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5 + dead $sgpr30_sgpr31 = SI_CALL %5, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit undef $vgpr0 + $sgpr5 = COPY %8 + %12:sreg_32_xm0 = COPY $sgpr5 + ADJCALLSTACKDOWN 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5 + ADJCALLSTACKUP 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5 + $vgpr0 = COPY %3 + dead $sgpr30_sgpr31 = SI_CALL %5, @func, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit killed $vgpr0 + $sgpr5 = COPY %12 + ADJCALLSTACKDOWN 0, 0, implicit-def $sgpr32, implicit $sgpr32, implicit $sgpr5 + +...