diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 96cfce5b84df..1377a6dd6aa7 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -409,9 +409,6 @@ namespace llvm { /// This pass frees the memory occupied by the MachineFunction. FunctionPass *createFreeMachineFunctionPass(); - /// This pass combine basic blocks guarded by the same branch. - extern char &BranchCoalescingID; - /// This pass performs outlining on machine instructions directly before /// printing assembly. ModulePass *createMachineOutlinerPass(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 39ac4649b70d..94f0bfa45524 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -76,7 +76,6 @@ void initializeBasicAAWrapperPassPass(PassRegistry&); void initializeBlockExtractorPassPass(PassRegistry&); void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&); void initializeBoundsCheckingPass(PassRegistry&); -void initializeBranchCoalescingPass(PassRegistry&); void initializeBranchFolderPassPass(PassRegistry&); void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&); void initializeBranchRelaxationPass(PassRegistry&); diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 7f3c6da91268..7ec7fda4e445 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -4,7 +4,6 @@ add_llvm_library(LLVMCodeGen Analysis.cpp AtomicExpandPass.cpp BasicTargetTransformInfo.cpp - BranchCoalescing.cpp BranchFolding.cpp BranchRelaxation.cpp BuiltinGCs.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index b7fd45a3f6a6..5acd6d52ea82 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -21,7 +21,6 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { initializeAtomicExpandPass(Registry); - initializeBranchCoalescingPass(Registry); initializeBranchFolderPassPass(Registry); initializeBranchRelaxationPass(Registry); initializeCodeGenPreparePass(Registry); diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 481baea2dff0..34d96a811300 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -919,9 +919,6 @@ void TargetPassConfig::addMachineSSAOptimization() { addPass(&MachineLICMID, false); addPass(&MachineCSEID, false); - // Coalesce basic blocks with the same branch condition - addPass(&BranchCoalescingID); - addPass(&MachineSinkingID); addPass(&PeepholeOptimizerID); diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt index 7ca4c1999003..4aa6dfab5257 100644 --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -16,6 +16,7 @@ add_llvm_target(PowerPCCodeGen PPCBoolRetToInt.cpp PPCAsmPrinter.cpp PPCBranchSelector.cpp + PPCBranchCoalescing.cpp PPCCCState.cpp PPCCTRLoops.cpp PPCHazardRecognizers.cpp diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h index ad92ac8ce120..40790011f9ba 100644 --- a/llvm/lib/Target/PowerPC/PPC.h +++ b/llvm/lib/Target/PowerPC/PPC.h @@ -41,6 +41,7 @@ namespace llvm { FunctionPass *createPPCVSXSwapRemovalPass(); FunctionPass *createPPCMIPeepholePass(); FunctionPass *createPPCBranchSelectionPass(); + FunctionPass *createPPCBranchCoalescingPass(); FunctionPass *createPPCQPXLoadSplatPass(); FunctionPass *createPPCISelDag(PPCTargetMachine &TM, CodeGenOpt::Level OL); FunctionPass *createPPCTLSDynamicCallPass(); diff --git a/llvm/lib/CodeGen/BranchCoalescing.cpp b/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp similarity index 90% rename from llvm/lib/CodeGen/BranchCoalescing.cpp rename to llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp index 2c41b597843c..467cb261b9fe 100644 --- a/llvm/lib/CodeGen/BranchCoalescing.cpp +++ b/llvm/lib/Target/PowerPC/PPCBranchCoalescing.cpp @@ -13,6 +13,7 @@ /// //===----------------------------------------------------------------------===// +#include "PPC.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" @@ -27,18 +28,18 @@ using namespace llvm; -#define DEBUG_TYPE "branch-coalescing" - -static cl::opt - EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden, - cl::desc("enable coalescing of duplicate branches")); +#define DEBUG_TYPE "ppc-branch-coalescing" STATISTIC(NumBlocksCoalesced, "Number of blocks coalesced"); STATISTIC(NumPHINotMoved, "Number of PHI Nodes that cannot be merged"); STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced"); +namespace llvm { + void initializePPCBranchCoalescingPass(PassRegistry&); +} + //===----------------------------------------------------------------------===// -// BranchCoalescing +// PPCBranchCoalescing //===----------------------------------------------------------------------===// /// /// Improve scheduling by coalescing branches that depend on the same condition. @@ -46,13 +47,17 @@ STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced"); /// and attempts to merge the blocks together. Such opportunities arise from /// the expansion of select statements in the IR. /// -/// For example, consider the following LLVM IR: +/// This pass does not handle implicit operands on branch statements. In order +/// to run on targets that use implicit operands, changes need to be made in the +/// canCoalesceBranch and canMerge methods. /// -/// %test = icmp eq i32 %x 0 -/// %tmp1 = select i1 %test, double %a, double 2.000000e-03 -/// %tmp2 = select i1 %test, double %b, double 5.000000e-03 +/// Example: the following LLVM IR /// -/// This IR expands to the following machine code on PowerPC: +/// %test = icmp eq i32 %x 0 +/// %tmp1 = select i1 %test, double %a, double 2.000000e-03 +/// %tmp2 = select i1 %test, double %b, double 5.000000e-03 +/// +/// expands to the following machine code: /// /// BB#0: derived from LLVM BB %entry /// Live Ins: %F1 %F3 %X6 @@ -132,7 +137,7 @@ STATISTIC(NumBlocksNotCoalesced, "Number of blocks not coalesced"); namespace { -class BranchCoalescing : public MachineFunctionPass { +class PPCBranchCoalescing : public MachineFunctionPass { struct CoalescingCandidateInfo { MachineBasicBlock *BranchBlock; // Block containing the branch MachineBasicBlock *BranchTargetBlock; // Block branched to @@ -157,15 +162,11 @@ class BranchCoalescing : public MachineFunctionPass { bool validateCandidates(CoalescingCandidateInfo &SourceRegion, CoalescingCandidateInfo &TargetRegion) const; - static bool isBranchCoalescingEnabled() { - return EnableBranchCoalescing == cl::BOU_TRUE; - } - public: static char ID; - BranchCoalescing() : MachineFunctionPass(ID) { - initializeBranchCoalescingPass(*PassRegistry::getPassRegistry()); + PPCBranchCoalescing() : MachineFunctionPass(ID) { + initializePPCBranchCoalescingPass(*PassRegistry::getPassRegistry()); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -190,21 +191,25 @@ public: }; } // End anonymous namespace. -char BranchCoalescing::ID = 0; -char &llvm::BranchCoalescingID = BranchCoalescing::ID; +char PPCBranchCoalescing::ID = 0; +/// createPPCBranchCoalescingPass - returns an instance of the Branch Coalescing +/// Pass +FunctionPass *llvm::createPPCBranchCoalescingPass() { + return new PPCBranchCoalescing(); +} -INITIALIZE_PASS_BEGIN(BranchCoalescing, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(PPCBranchCoalescing, DEBUG_TYPE, "Branch Coalescing", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) -INITIALIZE_PASS_END(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing", +INITIALIZE_PASS_END(PPCBranchCoalescing, DEBUG_TYPE, "Branch Coalescing", false, false) -BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo() +PPCBranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo() : BranchBlock(nullptr), BranchTargetBlock(nullptr), FallThroughBlock(nullptr), MustMoveDown(false), MustMoveUp(false) {} -void BranchCoalescing::CoalescingCandidateInfo::clear() { +void PPCBranchCoalescing::CoalescingCandidateInfo::clear() { BranchBlock = nullptr; BranchTargetBlock = nullptr; FallThroughBlock = nullptr; @@ -213,7 +218,7 @@ void BranchCoalescing::CoalescingCandidateInfo::clear() { MustMoveUp = false; } -void BranchCoalescing::initialize(MachineFunction &MF) { +void PPCBranchCoalescing::initialize(MachineFunction &MF) { MDT = &getAnalysis(); MPDT = &getAnalysis(); TII = MF.getSubtarget().getInstrInfo(); @@ -230,7 +235,7 @@ void BranchCoalescing::initialize(MachineFunction &MF) { ///\param[in,out] Cand The coalescing candidate to analyze ///\return true if and only if the branch can be coalesced, false otherwise /// -bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) { +bool PPCBranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) { DEBUG(dbgs() << "Determine if branch block " << Cand.BranchBlock->getNumber() << " can be coalesced:"); MachineBasicBlock *FalseMBB = nullptr; @@ -246,6 +251,19 @@ bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) { if (!I.isBranch()) continue; + // The analyzeBranch method does not include any implicit operands. + // This is not an issue on PPC but must be handled on other targets. + // For this pass to be made target-independent, the analyzeBranch API + // need to be updated to support implicit operands and there would + // need to be a way to verify that any implicit operands would not be + // clobbered by merging blocks. This would include identifying the + // implicit operands as well as the basic block they are defined in. + // This could be done by changing the analyzeBranch API to have it also + // record and return the implicit operands and the blocks where they are + // defined. Alternatively, the BranchCoalescing code would need to be + // extended to identify the implicit operands. The analysis in canMerge + // must then be extended to prove that none of the implicit operands are + // changed in the blocks that are combined during coalescing. if (I.getNumOperands() != I.getNumExplicitOperands()) { DEBUG(dbgs() << "Terminator contains implicit operands - skip : " << I << "\n"); @@ -309,7 +327,7 @@ bool BranchCoalescing::canCoalesceBranch(CoalescingCandidateInfo &Cand) { /// \param[in] OpList2 operand list /// \return true if and only if the operands lists are identical /// -bool BranchCoalescing::identicalOperands( +bool PPCBranchCoalescing::identicalOperands( ArrayRef OpList1, ArrayRef OpList2) const { if (OpList1.size() != OpList2.size()) { @@ -361,7 +379,7 @@ bool BranchCoalescing::identicalOperands( /// \param[in] SourceMBB block to move PHI instructions from /// \param[in] TargetMBB block to move PHI instructions to /// -void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB, +void PPCBranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB, MachineBasicBlock *TargetMBB) { MachineBasicBlock::iterator MI = SourceMBB->begin(); @@ -394,7 +412,7 @@ void BranchCoalescing::moveAndUpdatePHIs(MachineBasicBlock *SourceMBB, /// \return true if it is safe to move MI to beginning of TargetMBB, /// false otherwise. /// -bool BranchCoalescing::canMoveToBeginning(const MachineInstr &MI, +bool PPCBranchCoalescing::canMoveToBeginning(const MachineInstr &MI, const MachineBasicBlock &TargetMBB ) const { @@ -425,7 +443,7 @@ bool BranchCoalescing::canMoveToBeginning(const MachineInstr &MI, /// \return true if it is safe to move MI to end of TargetMBB, /// false otherwise. /// -bool BranchCoalescing::canMoveToEnd(const MachineInstr &MI, +bool PPCBranchCoalescing::canMoveToEnd(const MachineInstr &MI, const MachineBasicBlock &TargetMBB ) const { @@ -457,7 +475,7 @@ bool BranchCoalescing::canMoveToEnd(const MachineInstr &MI, /// \return true if all instructions in SourceRegion.BranchBlock can be merged /// into a block in TargetRegion; false otherwise. /// -bool BranchCoalescing::validateCandidates( +bool PPCBranchCoalescing::validateCandidates( CoalescingCandidateInfo &SourceRegion, CoalescingCandidateInfo &TargetRegion) const { @@ -500,7 +518,7 @@ bool BranchCoalescing::validateCandidates( /// \return true if all instructions in SourceRegion.BranchBlock can be merged /// into a block in TargetRegion, false otherwise. /// -bool BranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion, +bool PPCBranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion, CoalescingCandidateInfo &TargetRegion) const { if (!validateCandidates(SourceRegion, TargetRegion)) return false; @@ -605,7 +623,7 @@ bool BranchCoalescing::canMerge(CoalescingCandidateInfo &SourceRegion, /// \param[in] SourceRegion The candidate to move blocks from /// \param[in] TargetRegion The candidate to move blocks to /// -bool BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion, +bool PPCBranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion, CoalescingCandidateInfo &TargetRegion) { if (SourceRegion.MustMoveUp && SourceRegion.MustMoveDown) { @@ -685,10 +703,9 @@ bool BranchCoalescing::mergeCandidates(CoalescingCandidateInfo &SourceRegion, return true; } -bool BranchCoalescing::runOnMachineFunction(MachineFunction &MF) { +bool PPCBranchCoalescing::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction()) || MF.empty() || - !isBranchCoalescingEnabled()) + if (skipFunction(*MF.getFunction()) || MF.empty()) return false; bool didSomething = false; diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp index bc5d32b37fda..18426bdb6bf1 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -40,6 +40,10 @@ using namespace llvm; + +static cl::opt + DisableBranchCoalescing("disable-ppc-branch-coalesce", cl::Hidden, + cl::desc("disable coalescing of duplicate branches for PPC")); static cl:: opt DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden, cl::desc("Disable CTR loops for PPC")); @@ -378,6 +382,10 @@ bool PPCPassConfig::addInstSelector() { } void PPCPassConfig::addMachineSSAOptimization() { + // PPCBranchCoalescingPass need to be done before machine sinking + // since it merges empty blocks. + if (!DisableBranchCoalescing && getOptLevel() != CodeGenOpt::None) + addPass(createPPCBranchCoalescingPass()); TargetPassConfig::addMachineSSAOptimization(); // For little endian, remove where possible the vector swap instructions // introduced at code generation to normalize vector element order. diff --git a/llvm/test/CodeGen/PowerPC/branch_coalesce.ll b/llvm/test/CodeGen/PowerPC/branch_coalesce.ll index deb6d898c2e0..6ec9eed69353 100644 --- a/llvm/test/CodeGen/PowerPC/branch_coalesce.ll +++ b/llvm/test/CodeGen/PowerPC/branch_coalesce.ll @@ -1,9 +1,54 @@ -; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -enable-branch-coalesce=true < %s | FileCheck %s -; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -enable-branch-coalesce=true < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs -disable-ppc-branch-coalesce < %s | FileCheck --check-prefix=CHECK-NOCOALESCE %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu -verify-machineinstrs -disable-ppc-branch-coalesce < %s | FileCheck --check-prefix=CHECK-NOCOALESCE %s ; Function Attrs: nounwind define double @testBranchCoal(double %a, double %b, double %c, i32 %x) { -entry: + +; CHECK-LABEL: @testBranchCoal +; CHECK: cmplwi [[CMPR:[0-7]+]], 6, 0 +; CHECK: beq [[CMPR]], .LBB[[LAB1:[0-9_]+]] +; CHECK-DAG: addis [[LD1REG:[0-9]+]], 2, .LCPI0_0@toc@ha +; CHECK-DAG: addis [[LD2REG:[0-9]+]], 2, .LCPI0_1@toc@ha +; CHECK-DAG: xxlxor 2, 2, 2 +; CHECK-NOT: beq +; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]] +; CHECK-DAG: addi [[LD2BASE:[0-9]+]], [[LD2REG]] +; CHECK-DAG: lxsdx 1, 0, [[LD1BASE]] +; CHECK-DAG: lxsdx 3, 0, [[LD2BASE]] +; CHECK: .LBB[[LAB1]] +; CHECK: xsadddp 0, 1, 2 +; CHECK: xsadddp 1, 0, 3 +; CHECK: blr + +; CHECK-NOCOALESCE-LABEL: testBranchCoal: +; CHECK-NOCOALESCE: # BB#0: # %entry +; CHECK-NOCOALESCE-NEXT: cmplwi 0, 6, 0 +; CHECK-NOCOALESCE-NEXT: bne 0, .LBB0_5 +; CHECK-NOCOALESCE-NEXT: # BB#1: # %entry +; CHECK-NOCOALESCE-NEXT: bne 0, .LBB0_6 +; CHECK-NOCOALESCE-NEXT: .LBB0_2: # %entry +; CHECK-NOCOALESCE-NEXT: beq 0, .LBB0_4 +; CHECK-NOCOALESCE-NEXT: .LBB0_3: # %entry +; CHECK-NOCOALESCE-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; CHECK-NOCOALESCE-NEXT: addi 3, 3, .LCPI0_1@toc@l +; CHECK-NOCOALESCE-NEXT: lxsdx 3, 0, 3 +; CHECK-NOCOALESCE-NEXT: .LBB0_4: # %entry +; CHECK-NOCOALESCE-NEXT: xsadddp 0, 1, 2 +; CHECK-NOCOALESCE-NEXT: xsadddp 1, 0, 3 +; CHECK-NOCOALESCE-NEXT: blr +; CHECK-NOCOALESCE-NEXT: .LBB0_5: # %entry +; CHECK-NOCOALESCE-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-NOCOALESCE-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-NOCOALESCE-NEXT: lxsdx 1, 0, 3 +; CHECK-NOCOALESCE-NEXT: beq 0, .LBB0_2 +; CHECK-NOCOALESCE-NEXT: .LBB0_6: # %entry +; CHECK-NOCOALESCE-NEXT: xxlxor 2, 2, 2 +; CHECK-NOCOALESCE-NEXT: bne 0, .LBB0_3 +; CHECK-NOCOALESCE-NEXT: b .LBB0_4 + entry: + %test = icmp eq i32 %x, 0 %tmp1 = select i1 %test, double %a, double 2.000000e-03 %tmp2 = select i1 %test, double %b, double 0.000000e+00 @@ -12,20 +57,4 @@ entry: %res1 = fadd double %tmp1, %tmp2 %result = fadd double %res1, %tmp3 ret double %result - -; CHECK-LABEL: @testBranchCoal -; CHECK: cmplwi [[CMPR:[0-7]+]], 6, 0 -; CHECK: beq [[CMPR]], .LBB[[LAB1:[0-9_]+]] -; CHECK-DAG: addis [[LD1REG:[0-9]+]], 2, .LCPI0_0@toc@ha -; CHECK-DAG: addis [[LD2REG:[0-9]+]], 2, .LCPI0_1@toc@ha -; CHECK-DAG: xxlxor 2, 2, 2 -; CHECK-NOT: beq -; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]] -; CHECK-DAG: addi [[LD2BASE:[0-9]+]], [[LD2REG]] -; CHECK-DAG: lxsdx 1, 0, [[LD1BASE]] -; CHECK-DAG: lxsdx 3, 0, [[LD2BASE]] -; CHECK: .LBB[[LAB1]] -; CHECK: xsadddp 0, 1, 2 -; CHECK: xsadddp 1, 0, 3 -; CHECK: blr } diff --git a/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll b/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll index b7beb8165fdf..9026b48edb8f 100644 --- a/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll +++ b/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll @@ -1026,10 +1026,6 @@ entry: %cond = select i1 %cmp3, ppc_fp128 %a1, ppc_fp128 %a2 ret ppc_fp128 %cond -; FIXME: Because of the way that the late SELECT_* pseudo-instruction expansion -; works, we end up with two blocks with the same predicate. These could be -; combined. - ; CHECK-LABEL: @testppc_fp128eq ; CHECK-DAG: fcmpu {{[0-9]+}}, 6, 8 ; CHECK-DAG: fcmpu {{[0-9]+}}, 5, 7 @@ -1040,10 +1036,8 @@ entry: ; CHECK: crxor [[REG3:[0-9]+]], [[REG2]], [[REG1]] ; CHECK: bc 12, [[REG3]], .LBB[[BB1:[0-9_]+]] ; CHECK: fmr 11, 9 -; CHECK: .LBB[[BB1]]: -; CHECK: bc 12, [[REG3]], .LBB[[BB2:[0-9_]+]] ; CHECK: fmr 12, 10 -; CHECK: .LBB[[BB2]]: +; CHECK: .LBB[[BB1]]: ; CHECK-DAG: fmr 1, 11 ; CHECK-DAG: fmr 2, 12 ; CHECK: blr