[SystemZ, RegAlloc] Favor 3-address instructions during instruction selection.

This patch aims to reduce spilling and register moves by using the 3-address
versions of instructions per default instead of the 2-address equivalent
ones. It seems that both spilling and register moves are improved noticeably
generally.

Regalloc hints are passed to increase conversions to 2-address instructions
which are done in SystemZShortenInst.cpp (after regalloc).

Since the SystemZ reg/mem instructions are 2-address (dst and lhs regs are
the same), foldMemoryOperandImpl() can no longer trivially fold a spilled
source register since the reg/reg instruction is now 3-address. In order to
remedy this, new 3-address pseudo memory instructions are used to perform the
folding only when the dst and lhs virtual registers are known to be allocated
to the same physreg. In order to not let MachineCopyPropagation run and
change registers on these transformed instructions (making it 3-address), a
new target pass called SystemZPostRewrite.cpp is run just after
VirtRegRewriter, that immediately lowers the pseudo to a target instruction.

If it would have been possibe to insert a COPY instruction and change a
register operand (convert to 2-address) in foldMemoryOperandImpl() while
trusting that the caller (e.g. InlineSpiller) would update/repair the
involved LiveIntervals, the solution involving pseudo instructions would not
have been needed. This is perhaps a potential improvement (see Phabricator
post).

Common code changes:

* A new hook TargetPassConfig::addPostRewrite() is utilized to be able to run a
target pass immediately before MachineCopyPropagation.

* VirtRegMap is passed as an argument to foldMemoryOperand().

Review: Ulrich Weigand, Quentin Colombet
https://reviews.llvm.org/D60888

llvm-svn: 362868
This commit is contained in:
Jonas Paulsson 2019-06-08 06:19:15 +00:00
parent 27de3d3950
commit fdc4ea34e3
26 changed files with 515 additions and 217 deletions

View File

@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOutliner.h" #include "llvm/CodeGen/MachineOutliner.h"
#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/BranchProbability.h" #include "llvm/Support/BranchProbability.h"
#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ErrorHandling.h"
@ -932,9 +933,12 @@ public:
/// operand folded, otherwise NULL is returned. /// operand folded, otherwise NULL is returned.
/// The new instruction is inserted before MI, and the client is responsible /// The new instruction is inserted before MI, and the client is responsible
/// for removing the old instruction. /// for removing the old instruction.
/// If VRM is passed, the assigned physregs can be inspected by target to
/// decide on using an opcode (note that those assignments can still change).
MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops, MachineInstr *foldMemoryOperand(MachineInstr &MI, ArrayRef<unsigned> Ops,
int FI, int FI,
LiveIntervals *LIS = nullptr) const; LiveIntervals *LIS = nullptr,
VirtRegMap *VRM = nullptr) const;
/// Same as the previous version except it allows folding of any load and /// Same as the previous version except it allows folding of any load and
/// store from / to any address, not just from a specific stack slot. /// store from / to any address, not just from a specific stack slot.
@ -1024,7 +1028,8 @@ protected:
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex, MachineBasicBlock::iterator InsertPt, int FrameIndex,
LiveIntervals *LIS = nullptr) const { LiveIntervals *LIS = nullptr,
VirtRegMap *VRM = nullptr) const {
return nullptr; return nullptr;
} }

View File

@ -386,6 +386,10 @@ protected:
return false; return false;
} }
/// Add passes to be run immediately after virtual registers are rewritten
/// to physical registers.
virtual void addPostRewrite() { }
/// This method may be implemented by targets that want to run passes after /// This method may be implemented by targets that want to run passes after
/// register allocation pass pipeline but before prolog-epilog insertion. /// register allocation pass pipeline but before prolog-epilog insertion.
virtual void addPostRegAlloc() { } virtual void addPostRegAlloc() { }

View File

@ -837,7 +837,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
MachineInstr *FoldMI = MachineInstr *FoldMI =
LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS) LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS)
: TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS); : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM);
if (!FoldMI) if (!FoldMI)
return false; return false;

View File

@ -524,7 +524,8 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
ArrayRef<unsigned> Ops, int FI, ArrayRef<unsigned> Ops, int FI,
LiveIntervals *LIS) const { LiveIntervals *LIS,
VirtRegMap *VRM) const {
auto Flags = MachineMemOperand::MONone; auto Flags = MachineMemOperand::MONone;
for (unsigned OpIdx : Ops) for (unsigned OpIdx : Ops)
Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore
@ -570,7 +571,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
MBB->insert(MI, NewMI); MBB->insert(MI, NewMI);
} else { } else {
// Ask the target to do the actual folding. // Ask the target to do the actual folding.
NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS); NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM);
} }
if (NewMI) { if (NewMI) {

View File

@ -1168,6 +1168,10 @@ void TargetPassConfig::addOptimizedRegAlloc() {
addPass(&MachineSchedulerID); addPass(&MachineSchedulerID);
if (addRegAssignmentOptimized()) { if (addRegAssignmentOptimized()) {
// Allow targets to expand pseudo instructions depending on the choice of
// registers before MachineCopyPropagation.
addPostRewrite();
// Copy propagate to forward register uses and try to eliminate COPYs that // Copy propagate to forward register uses and try to eliminate COPYs that
// were not coalesced. // were not coalesced.
addPass(&MachineCopyPropagationID); addPass(&MachineCopyPropagationID);

View File

@ -3049,7 +3049,7 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,
MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex, MachineBasicBlock::iterator InsertPt, int FrameIndex,
LiveIntervals *LIS) const { LiveIntervals *LIS, VirtRegMap *VRM) const {
// This is a bit of a hack. Consider this instruction: // This is a bit of a hack. Consider this instruction:
// //
// %0 = COPY %sp; GPR64all:%0 // %0 = COPY %sp; GPR64all:%0

View File

@ -162,7 +162,8 @@ public:
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex, MachineBasicBlock::iterator InsertPt, int FrameIndex,
LiveIntervals *LIS = nullptr) const override; LiveIntervals *LIS = nullptr,
VirtRegMap *VRM = nullptr) const override;
/// \returns true if a branch from an instruction with opcode \p BranchOpc /// \returns true if a branch from an instruction with opcode \p BranchOpc
/// bytes is capable of jumping to a position \p BrOffset bytes away. /// bytes is capable of jumping to a position \p BrOffset bytes away.

View File

@ -30,6 +30,7 @@ add_llvm_target(SystemZCodeGen
SystemZMCInstLower.cpp SystemZMCInstLower.cpp
SystemZRegisterInfo.cpp SystemZRegisterInfo.cpp
SystemZSelectionDAGInfo.cpp SystemZSelectionDAGInfo.cpp
SystemZPostRewrite.cpp
SystemZShortenInst.cpp SystemZShortenInst.cpp
SystemZSubtarget.cpp SystemZSubtarget.cpp
SystemZTargetMachine.cpp SystemZTargetMachine.cpp

View File

@ -194,6 +194,7 @@ FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZTDCPass(); FunctionPass *createSystemZTDCPass();
} // end namespace llvm } // end namespace llvm

View File

@ -37,6 +37,12 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
string OpKey = ""; string OpKey = "";
string OpType = "none"; string OpType = "none";
// MemKey identifies a targe reg-mem opcode, while MemType can be either
// "pseudo" or "target". This is used to map a pseduo memory instruction to
// its corresponding target opcode. See comment at MemFoldPseudo.
string MemKey = "";
string MemType = "none";
// Many distinct-operands instructions have older 2-operand equivalents. // Many distinct-operands instructions have older 2-operand equivalents.
// NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs, // NumOpsKey uniquely identifies one of these 2-operand and 3-operand pairs,
// with NumOpsValue being "2" or "3" as appropriate. // with NumOpsValue being "2" or "3" as appropriate.
@ -120,7 +126,8 @@ def getDisp20Opcode : InstrMapping {
let ValueCols = [["20"]]; let ValueCols = [["20"]];
} }
// Return the memory form of a register instruction. // Return the memory form of a register instruction. Note that this may
// return a MemFoldPseudo instruction (see below).
def getMemOpcode : InstrMapping { def getMemOpcode : InstrMapping {
let FilterClass = "InstSystemZ"; let FilterClass = "InstSystemZ";
let RowFields = ["OpKey"]; let RowFields = ["OpKey"];
@ -129,13 +136,22 @@ def getMemOpcode : InstrMapping {
let ValueCols = [["mem"]]; let ValueCols = [["mem"]];
} }
// Return the 3-operand form of a 2-operand instruction. // Return the target memory instruction for a MemFoldPseudo.
def getThreeOperandOpcode : InstrMapping { def getTargetMemOpcode : InstrMapping {
let FilterClass = "InstSystemZ";
let RowFields = ["MemKey"];
let ColFields = ["MemType"];
let KeyCol = ["pseudo"];
let ValueCols = [["target"]];
}
// Return the 2-operand form of a 3-operand instruction.
def getTwoOperandOpcode : InstrMapping {
let FilterClass = "InstSystemZ"; let FilterClass = "InstSystemZ";
let RowFields = ["NumOpsKey"]; let RowFields = ["NumOpsKey"];
let ColFields = ["NumOpsValue"]; let ColFields = ["NumOpsValue"];
let KeyCol = ["2"]; let KeyCol = ["3"];
let ValueCols = [["3"]]; let ValueCols = [["2"]];
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -3066,6 +3082,8 @@ class BinaryRRFa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
mnemonic#"\t$R1, $R2, $R3", mnemonic#"\t$R1, $R2, $R3",
[(set cls1:$R1, (operator cls2:$R2, cls3:$R3))]> { [(set cls1:$R1, (operator cls2:$R2, cls3:$R3))]> {
let M4 = 0; let M4 = 0;
let OpKey = mnemonic#cls1;
let OpType = "reg";
} }
multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
@ -3073,9 +3091,9 @@ multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
RegisterOperand cls2> { RegisterOperand cls2> {
let NumOpsKey = mnemonic in { let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in let NumOpsValue = "3" in
def K : BinaryRRFa<mnemonic#"k", opcode2, null_frag, cls1, cls1, cls2>, def K : BinaryRRFa<mnemonic#"k", opcode2, operator, cls1, cls1, cls2>,
Requires<[FeatureDistinctOps]>; Requires<[FeatureDistinctOps]>;
let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in let NumOpsValue = "2" in
def "" : BinaryRR<mnemonic, opcode1, operator, cls1, cls2>; def "" : BinaryRR<mnemonic, opcode1, operator, cls1, cls2>;
} }
} }
@ -3085,9 +3103,9 @@ multiclass BinaryRREAndK<string mnemonic, bits<16> opcode1, bits<16> opcode2,
RegisterOperand cls2> { RegisterOperand cls2> {
let NumOpsKey = mnemonic in { let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in let NumOpsValue = "3" in
def K : BinaryRRFa<mnemonic#"k", opcode2, null_frag, cls1, cls1, cls2>, def K : BinaryRRFa<mnemonic#"k", opcode2, operator, cls1, cls1, cls2>,
Requires<[FeatureDistinctOps]>; Requires<[FeatureDistinctOps]>;
let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in let NumOpsValue = "2" in
def "" : BinaryRRE<mnemonic, opcode1, operator, cls1, cls2>; def "" : BinaryRRE<mnemonic, opcode1, operator, cls1, cls2>;
} }
} }
@ -3188,9 +3206,9 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
Immediate imm> { Immediate imm> {
let NumOpsKey = mnemonic in { let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in let NumOpsValue = "3" in
def K : BinaryRIE<mnemonic##"k", opcode2, null_frag, cls, imm>, def K : BinaryRIE<mnemonic##"k", opcode2, operator, cls, imm>,
Requires<[FeatureDistinctOps]>; Requires<[FeatureDistinctOps]>;
let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in let NumOpsValue = "2" in
def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>; def "" : BinaryRI<mnemonic, opcode1, operator, cls, imm>;
} }
} }
@ -3265,9 +3283,9 @@ multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
SDPatternOperator operator, RegisterOperand cls> { SDPatternOperator operator, RegisterOperand cls> {
let NumOpsKey = mnemonic in { let NumOpsKey = mnemonic in {
let NumOpsValue = "3" in let NumOpsValue = "3" in
def K : BinaryRSY<mnemonic##"k", opcode2, null_frag, cls>, def K : BinaryRSY<mnemonic##"k", opcode2, operator, cls>,
Requires<[FeatureDistinctOps]>; Requires<[FeatureDistinctOps]>;
let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in let NumOpsValue = "2" in
def "" : BinaryRS<mnemonic, opcode1, operator, cls>; def "" : BinaryRS<mnemonic, opcode1, operator, cls>;
} }
} }
@ -4593,14 +4611,31 @@ multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
RegisterOperand cls, Immediate imm> { RegisterOperand cls, Immediate imm> {
let NumOpsKey = key in { let NumOpsKey = key in {
let NumOpsValue = "3" in let NumOpsValue = "3" in
def K : BinaryRIEPseudo<null_frag, cls, imm>, def K : BinaryRIEPseudo<operator, cls, imm>,
Requires<[FeatureHighWord, FeatureDistinctOps]>; Requires<[FeatureHighWord, FeatureDistinctOps]>;
let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in let NumOpsValue = "2" in
def "" : BinaryRIPseudo<operator, cls, imm>, def "" : BinaryRIPseudo<operator, cls, imm>,
Requires<[FeatureHighWord]>; Requires<[FeatureHighWord]>;
} }
} }
// A pseudo that is used during register allocation when folding a memory
// operand. The 3-address register instruction with a spilled source cannot
// be converted directly to a target 2-address reg/mem instruction.
// Mapping: <INSN>R -> MemFoldPseudo -> <INSN>
class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
AddressingMode mode>
: Pseudo<(outs cls:$R1), (ins cls:$R2, mode:$XBD2), []> {
let OpKey = mnemonic#"rk"#cls;
let OpType = "mem";
let MemKey = mnemonic#cls;
let MemType = "pseudo";
let mayLoad = 1;
let AccessBytes = bytes;
let HasIndex = 1;
let hasNoSchedulingInfo = 1;
}
// Like CompareRI, but expanded after RA depending on the choice of register. // Like CompareRI, but expanded after RA depending on the choice of register.
class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls, class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
Immediate imm> Immediate imm>
@ -4775,58 +4810,6 @@ class AtomicLoadWBinaryReg<SDPatternOperator operator>
class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm> class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
: AtomicLoadWBinary<operator, (i32 imm:$src2), imm>; : AtomicLoadWBinary<operator, (i32 imm:$src2), imm>;
// Define an instruction that operates on two fixed-length blocks of memory,
// and associated pseudo instructions for operating on blocks of any size.
// The Sequence form uses a straight-line sequence of instructions and
// the Loop form uses a loop of length-256 instructions followed by
// another instruction to handle the excess.
multiclass MemorySS<string mnemonic, bits<8> opcode,
SDPatternOperator sequence, SDPatternOperator loop> {
def "" : SideEffectBinarySSa<mnemonic, opcode>;
let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CC] in {
def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length),
[(sequence bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length)]>;
def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length, GR64:$count256),
[(loop bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length, GR64:$count256)]>;
}
}
// The same, but setting a CC result as comparion operator.
multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
SDPatternOperator sequence, SDPatternOperator loop> {
def "" : SideEffectBinarySSa<mnemonic, opcode>;
let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length),
[(set CC, (sequence bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length))]>;
def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length, GR64:$count256),
[(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length, GR64:$count256))]>;
}
}
// Define an instruction that operates on two strings, both terminated
// by the character in R0. The instruction processes a CPU-determinated
// number of bytes at a time and sets CC to 3 if the instruction needs
// to be repeated. Also define a pseudo instruction that represents
// the full loop (the main instruction plus the branch on CC==3).
multiclass StringRRE<string mnemonic, bits<16> opcode,
SDPatternOperator operator> {
let Uses = [R0L] in
def "" : SideEffectBinaryMemMemRRE<mnemonic, opcode, GR64, GR64>;
let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in
def Loop : Pseudo<(outs GR64:$end),
(ins GR64:$start1, GR64:$start2, GR32:$char),
[(set GR64:$end, (operator GR64:$start1, GR64:$start2,
GR32:$char))]>;
}
// A pseudo instruction that is a direct alias of a real instruction. // A pseudo instruction that is a direct alias of a real instruction.
// These aliases are used in cases where a particular register operand is // These aliases are used in cases where a particular register operand is
// fixed or where the same instruction is used with different register sizes. // fixed or where the same instruction is used with different register sizes.
@ -4892,3 +4875,90 @@ class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2>
imm32zx6:$I5), []> { imm32zx6:$I5), []> {
let Constraints = "$R1 = $R1src"; let Constraints = "$R1 = $R1src";
} }
//===----------------------------------------------------------------------===//
// Multiclasses that emit both real and pseudo instructions
//===----------------------------------------------------------------------===//
multiclass BinaryRXYAndPseudo<string mnemonic, bits<16> opcode,
SDPatternOperator operator, RegisterOperand cls,
SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr20only> {
def "" : BinaryRXY<mnemonic, opcode, operator, cls, load, bytes, mode> {
let MemKey = mnemonic#cls;
let MemType = "target";
}
let Has20BitOffset = 1 in
def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, mode>;
}
multiclass BinaryRXPairAndPseudo<string mnemonic, bits<8> rxOpcode,
bits<16> rxyOpcode, SDPatternOperator operator,
RegisterOperand cls,
SDPatternOperator load, bits<5> bytes> {
let DispKey = mnemonic ## #cls in {
def "" : BinaryRX<mnemonic, rxOpcode, operator, cls, load, bytes,
bdxaddr12pair> {
let DispSize = "12";
let MemKey = mnemonic#cls;
let MemType = "target";
}
let DispSize = "20" in
def Y : BinaryRXY<mnemonic#"y", rxyOpcode, operator, cls, load,
bytes, bdxaddr20pair>;
}
def _MemFoldPseudo : MemFoldPseudo<mnemonic, cls, bytes, bdxaddr12pair>;
}
// Define an instruction that operates on two fixed-length blocks of memory,
// and associated pseudo instructions for operating on blocks of any size.
// The Sequence form uses a straight-line sequence of instructions and
// the Loop form uses a loop of length-256 instructions followed by
// another instruction to handle the excess.
multiclass MemorySS<string mnemonic, bits<8> opcode,
SDPatternOperator sequence, SDPatternOperator loop> {
def "" : SideEffectBinarySSa<mnemonic, opcode>;
let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CC] in {
def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length),
[(sequence bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length)]>;
def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length, GR64:$count256),
[(loop bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length, GR64:$count256)]>;
}
}
// The same, but setting a CC result as comparion operator.
multiclass CompareMemorySS<string mnemonic, bits<8> opcode,
SDPatternOperator sequence, SDPatternOperator loop> {
def "" : SideEffectBinarySSa<mnemonic, opcode>;
let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length),
[(set CC, (sequence bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length))]>;
def Loop : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length, GR64:$count256),
[(set CC, (loop bdaddr12only:$dest, bdaddr12only:$src,
imm64:$length, GR64:$count256))]>;
}
}
// Define an instruction that operates on two strings, both terminated
// by the character in R0. The instruction processes a CPU-determinated
// number of bytes at a time and sets CC to 3 if the instruction needs
// to be repeated. Also define a pseudo instruction that represents
// the full loop (the main instruction plus the branch on CC==3).
multiclass StringRRE<string mnemonic, bits<16> opcode,
SDPatternOperator operator> {
let Uses = [R0L] in
def "" : SideEffectBinaryMemMemRRE<mnemonic, opcode, GR64, GR64>;
let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in
def Loop : Pseudo<(outs GR64:$end),
(ins GR64:$start1, GR64:$start2, GR32:$char),
[(set GR64:$end, (operator GR64:$start1, GR64:$start2,
GR32:$char))]>;
}

View File

@ -957,73 +957,13 @@ static void transferDeadCC(MachineInstr *OldMI, MachineInstr *NewMI) {
} }
} }
// Used to return from convertToThreeAddress after replacing two-address
// instruction OldMI with three-address instruction NewMI.
static MachineInstr *finishConvertToThreeAddress(MachineInstr *OldMI,
MachineInstr *NewMI,
LiveVariables *LV) {
if (LV) {
unsigned NumOps = OldMI->getNumOperands();
for (unsigned I = 1; I < NumOps; ++I) {
MachineOperand &Op = OldMI->getOperand(I);
if (Op.isReg() && Op.isKill())
LV->replaceKillInstruction(Op.getReg(), *OldMI, *NewMI);
}
}
transferDeadCC(OldMI, NewMI);
return NewMI;
}
MachineInstr *SystemZInstrInfo::convertToThreeAddress( MachineInstr *SystemZInstrInfo::convertToThreeAddress(
MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const { MachineFunction::iterator &MFI, MachineInstr &MI, LiveVariables *LV) const {
MachineBasicBlock *MBB = MI.getParent(); MachineBasicBlock *MBB = MI.getParent();
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned Opcode = MI.getOpcode();
unsigned NumOps = MI.getNumOperands();
// Try to convert something like SLL into SLLK, if supported.
// We prefer to keep the two-operand form where possible both
// because it tends to be shorter and because some instructions
// have memory forms that can be used during spilling.
if (STI.hasDistinctOps()) {
MachineOperand &Dest = MI.getOperand(0);
MachineOperand &Src = MI.getOperand(1);
unsigned DestReg = Dest.getReg();
unsigned SrcReg = Src.getReg();
// AHIMux is only really a three-operand instruction when both operands
// are low registers. Try to constrain both operands to be low if
// possible.
if (Opcode == SystemZ::AHIMux &&
TargetRegisterInfo::isVirtualRegister(DestReg) &&
TargetRegisterInfo::isVirtualRegister(SrcReg) &&
MRI.getRegClass(DestReg)->contains(SystemZ::R1L) &&
MRI.getRegClass(SrcReg)->contains(SystemZ::R1L)) {
MRI.constrainRegClass(DestReg, &SystemZ::GR32BitRegClass);
MRI.constrainRegClass(SrcReg, &SystemZ::GR32BitRegClass);
}
int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode);
if (ThreeOperandOpcode >= 0) {
// Create three address instruction without adding the implicit
// operands. Those will instead be copied over from the original
// instruction by the loop below.
MachineInstrBuilder MIB(
*MF, MF->CreateMachineInstr(get(ThreeOperandOpcode), MI.getDebugLoc(),
/*NoImplicit=*/true));
MIB.add(Dest);
// Keep the kill state, but drop the tied flag.
MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
// Keep the remaining operands as-is.
for (unsigned I = 2; I < NumOps; ++I)
MIB.add(MI.getOperand(I));
MBB->insert(MI, MIB);
return finishConvertToThreeAddress(&MI, MIB, LV);
}
}
// Try to convert an AND into an RISBG-type instruction. // Try to convert an AND into an RISBG-type instruction.
if (LogicOp And = interpretAndImmediate(Opcode)) { // TODO: It might be beneficial to select RISBG and shorten to AND instead.
if (LogicOp And = interpretAndImmediate(MI.getOpcode())) {
uint64_t Imm = MI.getOperand(2).getImm() << And.ImmLSB; uint64_t Imm = MI.getOperand(2).getImm() << And.ImmLSB;
// AND IMMEDIATE leaves the other bits of the register unchanged. // AND IMMEDIATE leaves the other bits of the register unchanged.
Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB); Imm |= allOnes(And.RegSize) & ~(allOnes(And.ImmSize) << And.ImmLSB);
@ -1051,7 +991,16 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress(
.addImm(Start) .addImm(Start)
.addImm(End + 128) .addImm(End + 128)
.addImm(0); .addImm(0);
return finishConvertToThreeAddress(&MI, MIB, LV); if (LV) {
unsigned NumOps = MI.getNumOperands();
for (unsigned I = 1; I < NumOps; ++I) {
MachineOperand &Op = MI.getOperand(I);
if (Op.isReg() && Op.isKill())
LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
}
}
transferDeadCC(&MI, MIB);
return MIB;
} }
} }
return nullptr; return nullptr;
@ -1060,7 +1009,7 @@ MachineInstr *SystemZInstrInfo::convertToThreeAddress(
MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl( MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex, MachineBasicBlock::iterator InsertPt, int FrameIndex,
LiveIntervals *LIS) const { LiveIntervals *LIS, VirtRegMap *VRM) const {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const MachineFrameInfo &MFI = MF.getFrameInfo(); const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Size = MFI.getObjectSize(FrameIndex); unsigned Size = MFI.getObjectSize(FrameIndex);
@ -1214,12 +1163,37 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
} }
} }
// If the spilled operand is the final one, try to change <INSN>R // If the spilled operand is the final one or the instruction is
// into <INSN>. // commutable, try to change <INSN>R into <INSN>.
unsigned NumOps = MI.getNumExplicitOperands();
int MemOpcode = SystemZ::getMemOpcode(Opcode); int MemOpcode = SystemZ::getMemOpcode(Opcode);
// See if this is a 3-address instruction that is convertible to 2-address
// and suitable for folding below. Only try this with virtual registers
// and a provided VRM (during regalloc).
bool NeedsCommute = false;
if (SystemZ::getTwoOperandOpcode(Opcode) != -1 && MemOpcode != -1) {
if (VRM == nullptr)
MemOpcode = -1;
else {
assert(NumOps == 3 && "Expected two source registers.");
unsigned DstReg = MI.getOperand(0).getReg();
unsigned DstPhys =
(TRI->isVirtualRegister(DstReg) ? VRM->getPhys(DstReg) : DstReg);
unsigned SrcReg = (OpNum == 2 ? MI.getOperand(1).getReg()
: ((OpNum == 1 && MI.isCommutable())
? MI.getOperand(2).getReg()
: 0));
if (DstPhys && !SystemZ::GRH32BitRegClass.contains(DstPhys) && SrcReg &&
TRI->isVirtualRegister(SrcReg) && DstPhys == VRM->getPhys(SrcReg))
NeedsCommute = (OpNum == 1);
else
MemOpcode = -1;
}
}
if (MemOpcode >= 0) { if (MemOpcode >= 0) {
unsigned NumOps = MI.getNumExplicitOperands(); if ((OpNum == NumOps - 1) || NeedsCommute) {
if (OpNum == NumOps - 1) {
const MCInstrDesc &MemDesc = get(MemOpcode); const MCInstrDesc &MemDesc = get(MemOpcode);
uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags); uint64_t AccessBytes = SystemZII::getAccessSize(MemDesc.TSFlags);
assert(AccessBytes != 0 && "Size of access should be known"); assert(AccessBytes != 0 && "Size of access should be known");
@ -1227,8 +1201,12 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
uint64_t Offset = Size - AccessBytes; uint64_t Offset = Size - AccessBytes;
MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt, MachineInstrBuilder MIB = BuildMI(*InsertPt->getParent(), InsertPt,
MI.getDebugLoc(), get(MemOpcode)); MI.getDebugLoc(), get(MemOpcode));
for (unsigned I = 0; I < OpNum; ++I) MIB.add(MI.getOperand(0));
MIB.add(MI.getOperand(I)); if (NeedsCommute)
MIB.add(MI.getOperand(2));
else
for (unsigned I = 1; I < OpNum; ++I)
MIB.add(MI.getOperand(I));
MIB.addFrameIndex(FrameIndex).addImm(Offset); MIB.addFrameIndex(FrameIndex).addImm(Offset);
if (MemDesc.TSFlags & SystemZII::HasIndex) if (MemDesc.TSFlags & SystemZII::HasIndex)
MIB.addReg(0); MIB.addReg(0);

View File

@ -141,6 +141,11 @@ enum FusedCompareType {
} // end namespace SystemZII } // end namespace SystemZII
namespace SystemZ {
int getTwoOperandOpcode(uint16_t Opcode);
int getTargetMemOpcode(uint16_t Opcode);
}
class SystemZInstrInfo : public SystemZGenInstrInfo { class SystemZInstrInfo : public SystemZGenInstrInfo {
const SystemZRegisterInfo RI; const SystemZRegisterInfo RI;
SystemZSubtarget &STI; SystemZSubtarget &STI;
@ -248,7 +253,8 @@ public:
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex, MachineBasicBlock::iterator InsertPt, int FrameIndex,
LiveIntervals *LIS = nullptr) const override; LiveIntervals *LIS = nullptr,
VirtRegMap *VRM = nullptr) const override;
MachineInstr *foldMemoryOperandImpl( MachineInstr *foldMemoryOperandImpl(
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops, MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI, MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,

View File

@ -916,11 +916,11 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
// Addition of memory. // Addition of memory.
defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, z_sadd, GR32, asextloadi16, 2>; defm AH : BinaryRXPair<"ah", 0x4A, 0xE37A, z_sadd, GR32, asextloadi16, 2>;
defm A : BinaryRXPair<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>; defm A : BinaryRXPairAndPseudo<"a", 0x5A, 0xE35A, z_sadd, GR32, load, 4>;
def AGH : BinaryRXY<"agh", 0xE338, z_sadd, GR64, asextloadi16, 2>, def AGH : BinaryRXY<"agh", 0xE338, z_sadd, GR64, asextloadi16, 2>,
Requires<[FeatureMiscellaneousExtensions2]>; Requires<[FeatureMiscellaneousExtensions2]>;
def AGF : BinaryRXY<"agf", 0xE318, z_sadd, GR64, asextloadi32, 4>; def AGF : BinaryRXY<"agf", 0xE318, z_sadd, GR64, asextloadi32, 4>;
def AG : BinaryRXY<"ag", 0xE308, z_sadd, GR64, load, 8>; defm AG : BinaryRXYAndPseudo<"ag", 0xE308, z_sadd, GR64, load, 8>;
// Addition to memory. // Addition to memory.
def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>; def ASI : BinarySIY<"asi", 0xEB6A, add, imm32sx8>;
@ -958,9 +958,9 @@ let Defs = [CC] in {
Requires<[FeatureHighWord]>; Requires<[FeatureHighWord]>;
// Addition of memory. // Addition of memory.
defm AL : BinaryRXPair<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>; defm AL : BinaryRXPairAndPseudo<"al", 0x5E, 0xE35E, z_uadd, GR32, load, 4>;
def ALGF : BinaryRXY<"algf", 0xE31A, z_uadd, GR64, azextloadi32, 4>; def ALGF : BinaryRXY<"algf", 0xE31A, z_uadd, GR64, azextloadi32, 4>;
def ALG : BinaryRXY<"alg", 0xE30A, z_uadd, GR64, load, 8>; defm ALG : BinaryRXYAndPseudo<"alg", 0xE30A, z_uadd, GR64, load, 8>;
// Addition to memory. // Addition to memory.
def ALSI : BinarySIY<"alsi", 0xEB6E, null_frag, imm32sx8>; def ALSI : BinarySIY<"alsi", 0xEB6E, null_frag, imm32sx8>;
@ -1003,11 +1003,11 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
// Subtraction of memory. // Subtraction of memory.
defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssub, GR32, asextloadi16, 2>; defm SH : BinaryRXPair<"sh", 0x4B, 0xE37B, z_ssub, GR32, asextloadi16, 2>;
defm S : BinaryRXPair<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>; defm S : BinaryRXPairAndPseudo<"s", 0x5B, 0xE35B, z_ssub, GR32, load, 4>;
def SGH : BinaryRXY<"sgh", 0xE339, z_ssub, GR64, asextloadi16, 2>, def SGH : BinaryRXY<"sgh", 0xE339, z_ssub, GR64, asextloadi16, 2>,
Requires<[FeatureMiscellaneousExtensions2]>; Requires<[FeatureMiscellaneousExtensions2]>;
def SGF : BinaryRXY<"sgf", 0xE319, z_ssub, GR64, asextloadi32, 4>; def SGF : BinaryRXY<"sgf", 0xE319, z_ssub, GR64, asextloadi32, 4>;
def SG : BinaryRXY<"sg", 0xE309, z_ssub, GR64, load, 8>; defm SG : BinaryRXYAndPseudo<"sg", 0xE309, z_ssub, GR64, load, 8>;
} }
defm : SXB<z_ssub, GR64, SGFR>; defm : SXB<z_ssub, GR64, SGFR>;
@ -1055,9 +1055,9 @@ let Defs = [CC] in {
def SLGFI : BinaryRIL<"slgfi", 0xC24, z_usub, GR64, imm64zx32>; def SLGFI : BinaryRIL<"slgfi", 0xC24, z_usub, GR64, imm64zx32>;
// Subtraction of memory. // Subtraction of memory.
defm SL : BinaryRXPair<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>; defm SL : BinaryRXPairAndPseudo<"sl", 0x5F, 0xE35F, z_usub, GR32, load, 4>;
def SLGF : BinaryRXY<"slgf", 0xE31B, z_usub, GR64, azextloadi32, 4>; def SLGF : BinaryRXY<"slgf", 0xE31B, z_usub, GR64, azextloadi32, 4>;
def SLG : BinaryRXY<"slg", 0xE30B, z_usub, GR64, load, 8>; defm SLG : BinaryRXYAndPseudo<"slg", 0xE30B, z_usub, GR64, load, 8>;
} }
defm : ZXB<z_usub, GR64, SLGFR>; defm : ZXB<z_usub, GR64, SLGFR>;
@ -1132,8 +1132,8 @@ let Defs = [CC] in {
// ANDs of memory. // ANDs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in { let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
defm N : BinaryRXPair<"n", 0x54, 0xE354, and, GR32, load, 4>; defm N : BinaryRXPairAndPseudo<"n", 0x54, 0xE354, and, GR32, load, 4>;
def NG : BinaryRXY<"ng", 0xE380, and, GR64, load, 8>; defm NG : BinaryRXYAndPseudo<"ng", 0xE380, and, GR64, load, 8>;
} }
// AND to memory // AND to memory
@ -1189,8 +1189,8 @@ let Defs = [CC] in {
// ORs of memory. // ORs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in { let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
defm O : BinaryRXPair<"o", 0x56, 0xE356, or, GR32, load, 4>; defm O : BinaryRXPairAndPseudo<"o", 0x56, 0xE356, or, GR32, load, 4>;
def OG : BinaryRXY<"og", 0xE381, or, GR64, load, 8>; defm OG : BinaryRXYAndPseudo<"og", 0xE381, or, GR64, load, 8>;
} }
// OR to memory // OR to memory
@ -1229,8 +1229,8 @@ let Defs = [CC] in {
// XORs of memory. // XORs of memory.
let CCValues = 0xC, CompareZeroCCMask = 0x8 in { let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
defm X : BinaryRXPair<"x",0x57, 0xE357, xor, GR32, load, 4>; defm X : BinaryRXPairAndPseudo<"x",0x57, 0xE357, xor, GR32, load, 4>;
def XG : BinaryRXY<"xg", 0xE382, xor, GR64, load, 8>; defm XG : BinaryRXYAndPseudo<"xg", 0xE382, xor, GR64, load, 8>;
} }
// XOR to memory // XOR to memory

View File

@ -0,0 +1,124 @@
//==---- SystemZPostRewrite.cpp - Select pseudos after RegAlloc ---*- C++ -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains a pass that is run immediately after VirtRegRewriter
// but before MachineCopyPropagation. The purpose is to lower pseudos to
// target instructions before any later pass might substitute a register for
// another.
//
//===----------------------------------------------------------------------===//
#include "SystemZ.h"
#include "SystemZInstrInfo.h"
#include "SystemZSubtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
using namespace llvm;
#define SYSTEMZ_POSTREWRITE_NAME "SystemZ Post Rewrite pass"
#define DEBUG_TYPE "systemz-postrewrite"
STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops.");
namespace llvm {
void initializeSystemZPostRewritePass(PassRegistry&);
}
namespace {
class SystemZPostRewrite : public MachineFunctionPass {
public:
static char ID;
SystemZPostRewrite() : MachineFunctionPass(ID) {
initializeSystemZPostRewritePass(*PassRegistry::getPassRegistry());
}
const SystemZInstrInfo *TII;
bool runOnMachineFunction(MachineFunction &Fn) override;
StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
private:
bool selectMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
bool selectMBB(MachineBasicBlock &MBB);
};
char SystemZPostRewrite::ID = 0;
} // end anonymous namespace
INITIALIZE_PASS(SystemZPostRewrite, "systemz-post-rewrite",
SYSTEMZ_POSTREWRITE_NAME, false, false)
/// Returns an instance of the Post Rewrite pass.
FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) {
return new SystemZPostRewrite();
}
/// If MBBI references a pseudo instruction that should be selected here,
/// do it and return true. Otherwise return false.
bool SystemZPostRewrite::selectMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
// Note: If this could be done during regalloc in foldMemoryOperandImpl()
// while also updating the LiveIntervals, there would be no need for the
// MemFoldPseudo to begin with.
int TargetMemOpcode = SystemZ::getTargetMemOpcode(Opcode);
if (TargetMemOpcode != -1) {
MI.setDesc(TII->get(TargetMemOpcode));
MI.tieOperands(0, 1);
unsigned DstReg = MI.getOperand(0).getReg();
MachineOperand &SrcMO = MI.getOperand(1);
if (DstReg != SrcMO.getReg()) {
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), DstReg)
.addReg(SrcMO.getReg());
SrcMO.setReg(DstReg);
MemFoldCopies++;
}
return true;
}
return false;
}
/// Iterate over the instructions in basic block MBB and select any
/// pseudo instructions. Return true if anything was modified.
bool SystemZPostRewrite::selectMBB(MachineBasicBlock &MBB) {
bool Modified = false;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
Modified |= selectMI(MBB, MBBI, NMBBI);
MBBI = NMBBI;
}
return Modified;
}
bool SystemZPostRewrite::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
bool Modified = false;
for (auto &MBB : MF)
Modified |= selectMBB(MBB);
return Modified;
}

View File

@ -81,7 +81,8 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
const VirtRegMap *VRM, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const { const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo *MRI = &MF.getRegInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints(
VirtReg, Order, Hints, MF, VRM, Matrix); VirtReg, Order, Hints, MF, VRM, Matrix);
@ -138,6 +139,51 @@ SystemZRegisterInfo::getRegAllocationHints(unsigned VirtReg,
} }
} }
if (VRM == nullptr)
return BaseImplRetVal;
// Add any two address hints after any copy hints.
SmallSet<unsigned, 4> TwoAddrHints;
for (auto &Use : MRI->reg_nodbg_instructions(VirtReg))
if (SystemZ::getTwoOperandOpcode(Use.getOpcode()) != -1) {
const MachineOperand *VRRegMO = nullptr;
const MachineOperand *OtherMO = nullptr;
const MachineOperand *CommuMO = nullptr;
if (VirtReg == Use.getOperand(0).getReg()) {
VRRegMO = &Use.getOperand(0);
OtherMO = &Use.getOperand(1);
if (Use.isCommutable())
CommuMO = &Use.getOperand(2);
} else if (VirtReg == Use.getOperand(1).getReg()) {
VRRegMO = &Use.getOperand(1);
OtherMO = &Use.getOperand(0);
} else if (VirtReg == Use.getOperand(2).getReg() && Use.isCommutable()) {
VRRegMO = &Use.getOperand(2);
OtherMO = &Use.getOperand(0);
} else
continue;
auto tryAddHint = [&](const MachineOperand *MO) -> void {
unsigned Reg = MO->getReg();
unsigned PhysReg = isPhysicalRegister(Reg) ? Reg : VRM->getPhys(Reg);
if (PhysReg) {
if (MO->getSubReg())
PhysReg = getSubReg(PhysReg, MO->getSubReg());
if (VRRegMO->getSubReg())
PhysReg = getMatchingSuperReg(PhysReg, VRRegMO->getSubReg(),
MRI->getRegClass(VirtReg));
if (!MRI->isReserved(PhysReg) && !is_contained(Hints, PhysReg))
TwoAddrHints.insert(PhysReg);
}
};
tryAddHint(OtherMO);
if (CommuMO)
tryAddHint(CommuMO);
}
for (MCPhysReg OrderReg : Order)
if (TwoAddrHints.count(OrderReg))
Hints.push_back(OrderReg);
return BaseImplRetVal; return BaseImplRetVal;
} }

View File

@ -299,6 +299,31 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
case SystemZ::VST64: case SystemZ::VST64:
Changed |= shortenOn0(MI, SystemZ::STD); Changed |= shortenOn0(MI, SystemZ::STD);
break; break;
default: {
int TwoOperandOpcode = SystemZ::getTwoOperandOpcode(MI.getOpcode());
if (TwoOperandOpcode == -1)
break;
if ((MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) &&
(!MI.isCommutable() ||
MI.getOperand(0).getReg() != MI.getOperand(2).getReg() ||
!TII->commuteInstruction(MI, false, 1, 2)))
break;
MI.setDesc(TII->get(TwoOperandOpcode));
MI.tieOperands(0, 1);
if (TwoOperandOpcode == SystemZ::SLL ||
TwoOperandOpcode == SystemZ::SLA ||
TwoOperandOpcode == SystemZ::SRL ||
TwoOperandOpcode == SystemZ::SRA) {
// These shifts only use the low 6 bits of the shift count.
MachineOperand &ImmMO = MI.getOperand(3);
ImmMO.setImm(ImmMO.getImm() & 0xfff);
}
Changed = true;
break;
}
} }
LiveRegs.stepBackward(MI); LiveRegs.stepBackward(MI);

View File

@ -183,6 +183,7 @@ public:
void addIRPasses() override; void addIRPasses() override;
bool addInstSelector() override; bool addInstSelector() override;
bool addILPOpts() override; bool addILPOpts() override;
void addPostRewrite() override;
void addPreSched2() override; void addPreSched2() override;
void addPreEmitPass() override; void addPreEmitPass() override;
}; };
@ -212,7 +213,16 @@ bool SystemZPassConfig::addILPOpts() {
return true; return true;
} }
void SystemZPassConfig::addPostRewrite() {
addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
}
void SystemZPassConfig::addPreSched2() { void SystemZPassConfig::addPreSched2() {
// PostRewrite needs to be run at -O0 also (in which case addPostRewrite()
// is not called).
if (getOptLevel() == CodeGenOpt::None)
addPass(createSystemZPostRewritePass(getSystemZTargetMachine()));
addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine())); addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine()));
if (getOptLevel() != CodeGenOpt::None) if (getOptLevel() != CodeGenOpt::None)

View File

@ -4783,7 +4783,8 @@ MachineInstr *
X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, MachineBasicBlock::iterator InsertPt,
int FrameIndex, LiveIntervals *LIS) const { int FrameIndex, LiveIntervals *LIS,
VirtRegMap *VRM) const {
// Check switch flag // Check switch flag
if (NoFusing) if (NoFusing)
return nullptr; return nullptr;

View File

@ -350,7 +350,8 @@ public:
foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
ArrayRef<unsigned> Ops, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, int FrameIndex, MachineBasicBlock::iterator InsertPt, int FrameIndex,
LiveIntervals *LIS = nullptr) const override; LiveIntervals *LIS = nullptr,
VirtRegMap *VRM = nullptr) const override;
/// foldMemoryOperand - Same as the previous version except it allows folding /// foldMemoryOperand - Same as the previous version except it allows folding
/// of any load and store from / to any address, not just from a specific /// of any load and store from / to any address, not just from a specific

View File

@ -603,13 +603,13 @@ define void @f27() {
} }
; Test three-operand halfword immediate addition involving mixtures of low ; Test three-operand halfword immediate addition involving mixtures of low
; and high registers. RISBHG/AIH would be OK too, instead of AHIK/RISBHG. ; and high registers. AHIK/RISBHG would be OK too, instead of RISBHG/AIH.
define i32 @f28(i32 %old) { define i32 @f28(i32 %old) {
; CHECK-LABEL: f28: ; CHECK-LABEL: f28:
; CHECK: ahik [[REG1:%r[0-5]]], %r2, 14 ; CHECK: ahik [[REG1:%r[0-5]]], %r2, 14
; CHECK: stepa %r2, [[REG1]] ; CHECK: stepa %r2, [[REG1]]
; CHECK: ahik [[TMP:%r[0-5]]], [[REG1]], 254 ; CHECK: risbhg [[REG1]], [[REG1]], 0, 159, 32
; CHECK: risbhg [[REG2:%r[0-5]]], [[TMP]], 0, 159, 32 ; CHECK: aih [[REG1]], 254
; CHECK: stepb [[REG1]], [[REG2]] ; CHECK: stepb [[REG1]], [[REG2]]
; CHECK: risbhg [[REG3:%r[0-5]]], [[REG2]], 0, 159, 0 ; CHECK: risbhg [[REG3:%r[0-5]]], [[REG2]], 0, 159, 0
; CHECK: aih [[REG3]], 127 ; CHECK: aih [[REG3]], 127

View File

@ -9,10 +9,10 @@ define i32 @f1(i32 %a) {
; CHECK-LABEL: f1: ; CHECK-LABEL: f1:
; CHECK: popcnt %r0, %r2 ; CHECK: popcnt %r0, %r2
; CHECK: sllk %r1, %r0, 16 ; CHECK: sllk %r1, %r0, 16
; CHECK: ar %r1, %r0 ; CHECK: ar %r0, %r1
; CHECK: sllk %r2, %r1, 8 ; CHECK: sllk %r1, %r0, 8
; CHECK: ar %r2, %r1 ; CHECK: ar %r0, %r1
; CHECK: srl %r2, 24 ; CHECK: srlk %r2, %r0, 24
; CHECK: br %r14 ; CHECK: br %r14
%popcnt = call i32 @llvm.ctpop.i32(i32 %a) %popcnt = call i32 @llvm.ctpop.i32(i32 %a)
@ -23,9 +23,9 @@ define i32 @f2(i32 %a) {
; CHECK-LABEL: f2: ; CHECK-LABEL: f2:
; CHECK: llhr %r0, %r2 ; CHECK: llhr %r0, %r2
; CHECK: popcnt %r0, %r0 ; CHECK: popcnt %r0, %r0
; CHECK: risblg %r2, %r0, 16, 151, 8 ; CHECK: risblg %r1, %r0, 16, 151, 8
; CHECK: ar %r2, %r0 ; CHECK: ar %r0, %r1
; CHECK: srl %r2, 8 ; CHECK: srlk %r2, %r0, 8
; CHECK: br %r14 ; CHECK: br %r14
%and = and i32 %a, 65535 %and = and i32 %a, 65535
%popcnt = call i32 @llvm.ctpop.i32(i32 %and) %popcnt = call i32 @llvm.ctpop.i32(i32 %and)
@ -46,12 +46,12 @@ define i64 @f4(i64 %a) {
; CHECK-LABEL: f4: ; CHECK-LABEL: f4:
; CHECK: popcnt %r0, %r2 ; CHECK: popcnt %r0, %r2
; CHECK: sllg %r1, %r0, 32 ; CHECK: sllg %r1, %r0, 32
; CHECK: agr %r1, %r0 ; CHECK: agr %r0, %r1
; CHECK: sllg %r0, %r1, 16 ; CHECK: sllg %r1, %r0, 16
; CHECK: agr %r0, %r1 ; CHECK: agr %r0, %r1
; CHECK: sllg %r1, %r0, 8 ; CHECK: sllg %r1, %r0, 8
; CHECK: agr %r1, %r0 ; CHECK: agr %r0, %r1
; CHECK: srlg %r2, %r1, 56 ; CHECK: srlg %r2, %r0, 56
; CHECK: br %r14 ; CHECK: br %r14
%popcnt = call i64 @llvm.ctpop.i64(i64 %a) %popcnt = call i64 @llvm.ctpop.i64(i64 %a)
ret i64 %popcnt ret i64 %popcnt
@ -76,8 +76,8 @@ define i64 @f6(i64 %a) {
; CHECK: llghr %r0, %r2 ; CHECK: llghr %r0, %r2
; CHECK: popcnt %r0, %r0 ; CHECK: popcnt %r0, %r0
; CHECK: risbg %r1, %r0, 48, 183, 8 ; CHECK: risbg %r1, %r0, 48, 183, 8
; CHECK: agr %r1, %r0 ; CHECK: agr %r0, %r1
; CHECK: srlg %r2, %r1, 8 ; CHECK: srlg %r2, %r0, 8
; CHECK: br %r14 ; CHECK: br %r14
%and = and i64 %a, 65535 %and = and i64 %a, 65535
%popcnt = call i64 @llvm.ctpop.i64(i64 %and) %popcnt = call i64 @llvm.ctpop.i64(i64 %and)

View File

@ -1,7 +1,7 @@
; Test 64-bit addition in which the second operand is variable. ; Test 64-bit addition in which the second operand is variable.
; ;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s --check-prefixes=CHECK,Z10
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s --check-prefixes=CHECK,Z196
declare i64 @foo() declare i64 @foo()
@ -97,10 +97,12 @@ define i64 @f8(i64 %a, i64 %src, i64 %index) {
} }
; Check that additions of spilled values can use AG rather than AGR. ; Check that additions of spilled values can use AG rather than AGR.
; Note: Z196 is suboptimal with one unfolded reload.
define i64 @f9(i64 *%ptr0) { define i64 @f9(i64 *%ptr0) {
; CHECK-LABEL: f9: ; CHECK-LABEL: f9:
; CHECK: brasl %r14, foo@PLT ; CHECK: brasl %r14, foo@PLT
; CHECK: ag %r2, 160(%r15) ; Z10: ag %r2, 168(%r15)
; Z196: ag %r0, 168(%r15)
; CHECK: br %r14 ; CHECK: br %r14
%ptr1 = getelementptr i64, i64 *%ptr0, i64 2 %ptr1 = getelementptr i64, i64 *%ptr0, i64 2
%ptr2 = getelementptr i64, i64 *%ptr0, i64 4 %ptr2 = getelementptr i64, i64 *%ptr0, i64 4

View File

@ -0,0 +1,22 @@
; Test of subtraction that involves a constant as the first operand
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
; Check highest 16-bit signed int immediate value.
define i64 @f1(i64 %a) {
; CHECK-LABEL: f1:
; CHECK: lghi %r0, 32767
; CHECK: sgrk %r2, %r0, %r2
; CHECK: br %r14
%sub = sub i64 32767, %a
ret i64 %sub
}
; Check highest 32-bit signed int immediate value.
define i64 @f2(i64 %a) {
; CHECK-LABEL: f2:
; CHECK: lgfi %r0, 2147483647
; CHECK: sgrk %r2, %r0, %r2
; CHECK: br %r14
%sub = sub i64 2147483647, %a
ret i64 %sub
}

View File

@ -55,10 +55,9 @@ define i16 @f4(i16 %arg) {
; CHECK-LABEL: %bb.0: ; CHECK-LABEL: %bb.0:
; CHECK-NEXT: # kill ; CHECK-NEXT: # kill
; CHECK-NEXT: llghr %r0, %r2 ; CHECK-NEXT: llghr %r0, %r2
; CHECK-NEXT: flogr %r2, %r0 ; CHECK-NEXT: flogr %r0, %r0
; CHECK-NEXT: aghi %r2, -32 ; CHECK-NEXT: aghi %r0, -32
; CHECK-NEXT: ahi %r2, -16 ; CHECK-NEXT: ahik %r2, %r0, -16
; CHECK-NEXT: # kill
; CHECK-NEXT: br %r14 ; CHECK-NEXT: br %r14
%1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 false) %1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 false)
ret i16 %1 ret i16 %1
@ -69,10 +68,9 @@ define i16 @f5(i16 %arg) {
; CHECK-LABEL: %bb.0: ; CHECK-LABEL: %bb.0:
; CHECK-NEXT: # kill ; CHECK-NEXT: # kill
; CHECK-NEXT: llghr %r0, %r2 ; CHECK-NEXT: llghr %r0, %r2
; CHECK-NEXT: flogr %r2, %r0 ; CHECK-NEXT: flogr %r0, %r0
; CHECK-NEXT: aghi %r2, -32 ; CHECK-NEXT: aghi %r0, -32
; CHECK-NEXT: ahi %r2, -16 ; CHECK-NEXT: ahik %r2, %r0, -16
; CHECK-NEXT: # kill
; CHECK-NEXT: br %r14 ; CHECK-NEXT: br %r14
%1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 true) %1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 true)
ret i16 %1 ret i16 %1
@ -83,10 +81,9 @@ define i8 @f6(i8 %arg) {
; CHECK-LABEL: %bb.0: ; CHECK-LABEL: %bb.0:
; CHECK-NEXT: # kill ; CHECK-NEXT: # kill
; CHECK-NEXT: llgcr %r0, %r2 ; CHECK-NEXT: llgcr %r0, %r2
; CHECK-NEXT: flogr %r2, %r0 ; CHECK-NEXT: flogr %r0, %r0
; CHECK-NEXT: aghi %r2, -32 ; CHECK-NEXT: aghi %r0, -32
; CHECK-NEXT: ahi %r2, -24 ; CHECK-NEXT: ahik %r2, %r0, -24
; CHECK-NEXT: # kill
; CHECK-NEXT: br %r14 ; CHECK-NEXT: br %r14
%1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 false) %1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 false)
ret i8 %1 ret i8 %1
@ -97,10 +94,9 @@ define i8 @f7(i8 %arg) {
; CHECK-LABEL: %bb.0: ; CHECK-LABEL: %bb.0:
; CHECK-NEXT: # kill ; CHECK-NEXT: # kill
; CHECK-NEXT: llgcr %r0, %r2 ; CHECK-NEXT: llgcr %r0, %r2
; CHECK-NEXT: flogr %r2, %r0 ; CHECK-NEXT: flogr %r0, %r0
; CHECK-NEXT: aghi %r2, -32 ; CHECK-NEXT: aghi %r0, -32
; CHECK-NEXT: ahi %r2, -24 ; CHECK-NEXT: ahik %r2, %r0, -24
; CHECK-NEXT: # kill
; CHECK-NEXT: br %r14 ; CHECK-NEXT: br %r14
%1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 true) %1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 true)
ret i8 %1 ret i8 %1

View File

@ -75,17 +75,17 @@ define void @fun2(<8 x i32> %src, <8 x i31>* %p)
; CHECK-NEXT: stmg %r14, %r15, 112(%r15) ; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r14, -48
; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: .cfi_offset %r15, -40
; CHECK-NEXT: vlgvf %r3, %v26, 1 ; CHECK-DAG: vlgvf [[REG11:%r[0-9]+]], %v26, 1
; CHECK-NEXT: vlgvf %r1, %v26, 2 ; CHECK-DAG: vlgvf [[REG12:%r[0-9]+]], %v26, 2
; CHECK-NEXT: risbgn %r4, %r3, 0, 129, 62 ; CHECK-DAG: risbgn [[REG13:%r[0-9]+]], [[REG11]], 0, 129, 62
; CHECK-NEXT: rosbg %r4, %r1, 2, 32, 31 ; CHECK-DAG: rosbg [[REG13]], [[REG12]], 2, 32, 31
; CHECK-DAG: vlgvf %r0, %v26, 3 ; CHECK-DAG: vlgvf %r0, %v26, 3
; CHECK-DAG: rosbg %r4, %r0, 33, 63, 0 ; CHECK-DAG: rosbg [[REG13]], %r0, 33, 63, 0
; CHECK-DAG: stc %r0, 30(%r2) ; CHECK-DAG: stc %r0, 30(%r2)
; CHECK-DAG: srl %r0, 8 ; CHECK-DAG: srlk %r1, %r0, 8
; CHECK-DAG: vlgvf [[REG0:%r[0-9]+]], %v24, 1 ; CHECK-DAG: vlgvf [[REG0:%r[0-9]+]], %v24, 1
; CHECK-DAG: vlgvf [[REG1:%r[0-9]+]], %v24, 0 ; CHECK-DAG: vlgvf [[REG1:%r[0-9]+]], %v24, 0
; CHECK-DAG: sth %r0, 28(%r2) ; CHECK-DAG: sth %r1, 28(%r2)
; CHECK-DAG: vlgvf [[REG2:%r[0-9]+]], %v24, 2 ; CHECK-DAG: vlgvf [[REG2:%r[0-9]+]], %v24, 2
; CHECK-DAG: risbgn [[REG3:%r[0-9]+]], [[REG0]], 0, 133, 58 ; CHECK-DAG: risbgn [[REG3:%r[0-9]+]], [[REG0]], 0, 133, 58
; CHECK-DAG: rosbg [[REG3]], [[REG2]], 6, 36, 27 ; CHECK-DAG: rosbg [[REG3]], [[REG2]], 6, 36, 27
@ -95,18 +95,18 @@ define void @fun2(<8 x i32> %src, <8 x i31>* %p)
; CHECK-DAG: rosbg [[REG3]], [[REG5]], 37, 63, 60 ; CHECK-DAG: rosbg [[REG3]], [[REG5]], 37, 63, 60
; CHECK-DAG: sllg [[REG6:%r[0-9]+]], [[REG4]], 8 ; CHECK-DAG: sllg [[REG6:%r[0-9]+]], [[REG4]], 8
; CHECK-DAG: rosbg [[REG6]], [[REG3]], 56, 63, 8 ; CHECK-DAG: rosbg [[REG6]], [[REG3]], 56, 63, 8
; CHECK-NEXT: stg [[REG6]], 0(%r2) ; CHECK-DAG: stg [[REG6]], 0(%r2)
; CHECK-NEXT: srlg [[REG7:%r[0-9]+]], %r4, 24 ; CHECK-DAG: srlg [[REG7:%r[0-9]+]], [[REG13]], 24
; CHECK-NEXT: st [[REG7]], 24(%r2) ; CHECK-DAG: st [[REG7]], 24(%r2)
; CHECK-NEXT: vlgvf [[REG8:%r[0-9]+]], %v26, 0 ; CHECK-DAG: vlgvf [[REG8:%r[0-9]+]], %v26, 0
; CHECK-NEXT: risbgn [[REG10:%r[0-9]+]], [[REG5]], 0, 131, 60 ; CHECK-DAG: risbgn [[REG10:%r[0-9]+]], [[REG5]], 0, 131, 60
; CHECK-NEXT: rosbg [[REG10]], [[REG8]], 4, 34, 29 ; CHECK-DAG: rosbg [[REG10]], [[REG8]], 4, 34, 29
; CHECK-NEXT: sllg [[REG9:%r[0-9]+]], [[REG3]], 8 ; CHECK-DAG: sllg [[REG9:%r[0-9]+]], [[REG3]], 8
; CHECK-NEXT: rosbg [[REG10]], %r3, 35, 63, 62 ; CHECK-DAG: rosbg [[REG10]], [[REG11]], 35, 63, 62
; CHECK-NEXT: rosbg [[REG9]], [[REG10]], 56, 63, 8 ; CHECK-DAG: rosbg [[REG9]], [[REG10]], 56, 63, 8
; CHECK-NEXT: stg [[REG9]], 8(%r2) ; CHECK-DAG: stg [[REG9]], 8(%r2)
; CHECK-NEXT: sllg %r0, [[REG10]], 8 ; CHECK-DAG: sllg %r0, [[REG10]], 8
; CHECK-NEXT: rosbg %r0, %r4, 56, 63, 8 ; CHECK-DAG: rosbg %r0, [[REG13]], 56, 63, 8
; CHECK-NEXT: stg %r0, 16(%r2) ; CHECK-NEXT: stg %r0, 16(%r2)
; CHECK-NEXT: lmg %r14, %r15, 112(%r15) ; CHECK-NEXT: lmg %r14, %r15, 112(%r15)
; CHECK-NEXT: br %r14 ; CHECK-NEXT: br %r14

View File

@ -408,7 +408,7 @@ define i32 @f9(double %scalar0, double %scalar1, double %scalar2,
; CHECK-NOT: vmrh ; CHECK-NOT: vmrh
; CHECK: ar {{%r[0-5]}}, ; CHECK: ar {{%r[0-5]}},
; CHECK: ar {{%r[0-5]}}, ; CHECK: ar {{%r[0-5]}},
; CHECK: or %r2, ; CHECK: ork %r2,
; CHECK: br %r14 ; CHECK: br %r14
%vec0 = insertelement <2 x double> undef, double %scalar0, i32 0 %vec0 = insertelement <2 x double> undef, double %scalar0, i32 0
%vec1 = insertelement <2 x double> undef, double %scalar1, i32 0 %vec1 = insertelement <2 x double> undef, double %scalar1, i32 0