[SystemZ] Improve use of conditional instructions

This patch moves formation of LOC-type instructions from (late)
IfConversion to the early if-conversion pass, and in some cases
additionally creates them directly from select instructions
during DAG instruction selection.

To make early if-conversion work, the patch implements the
canInsertSelect / insertSelect callbacks.  It also implements
the commuteInstructionImpl and FoldImmediate callbacks to
enable generation of the full range of LOC instructions.

Finally, the patch adds support for all instructions of the
load-store-on-condition-2 facility, which allows using LOC
instructions also for high registers.

Due to the use of the GRX32 register class to enable high registers,
we now also have to handle the cases where there are still no single
hardware instructions (conditional move from a low register to a high
register or vice versa).  These are converted back to a branch sequence
after register allocation.  Since the expandRAPseudos callback is not
allowed to create new basic blocks, this requires a simple new pass,
modelled after the ARM/AArch64 ExpandPseudos pass.

Overall, this patch causes significantly more LOC-type instructions
to be used, and results in a measurable performance improvement.

llvm-svn: 288028
This commit is contained in:
Ulrich Weigand 2016-11-28 13:34:08 +00:00
parent 79724fc0ae
commit 524f276c74
26 changed files with 1803 additions and 171 deletions

View File

@ -16,6 +16,7 @@ add_llvm_target(SystemZCodeGen
SystemZCallingConv.cpp
SystemZConstantPoolValue.cpp
SystemZElimCompare.cpp
SystemZExpandPseudo.cpp
SystemZFrameLowering.cpp
SystemZHazardRecognizer.cpp
SystemZISelDAGToDAG.cpp

View File

@ -175,6 +175,7 @@ static inline bool isImmHF(uint64_t Val) {
FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);

View File

@ -0,0 +1,153 @@
//==-- SystemZExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a pass that expands pseudo instructions into target
// instructions to allow proper scheduling and other late optimizations. This
// pass should be run after register allocation but before the post-regalloc
// scheduling pass.
//
//===----------------------------------------------------------------------===//
#include "SystemZ.h"
#include "SystemZInstrInfo.h"
#include "SystemZSubtarget.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
using namespace llvm;
#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ pseudo instruction expansion pass"
namespace llvm {
void initializeSystemZExpandPseudoPass(PassRegistry&);
}
namespace {
class SystemZExpandPseudo : public MachineFunctionPass {
public:
static char ID;
SystemZExpandPseudo() : MachineFunctionPass(ID) {
initializeSystemZExpandPseudoPass(*PassRegistry::getPassRegistry());
}
const SystemZInstrInfo *TII;
bool runOnMachineFunction(MachineFunction &Fn) override;
StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; }
private:
bool expandMBB(MachineBasicBlock &MBB);
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
};
char SystemZExpandPseudo::ID = 0;
}
INITIALIZE_PASS(SystemZExpandPseudo, "systemz-expand-pseudo",
SYSTEMZ_EXPAND_PSEUDO_NAME, false, false)
/// \brief Returns an instance of the pseudo instruction expansion pass.
FunctionPass *llvm::createSystemZExpandPseudoPass(SystemZTargetMachine &TM) {
return new SystemZExpandPseudo();
}
// MI is a load-register-on-condition pseudo instruction that could not be
// handled as a single hardware instruction. Replace it by a branch sequence.
bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
MachineFunction &MF = *MBB.getParent();
const BasicBlock *BB = MBB.getBasicBlock();
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
unsigned DestReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(2).getReg();
unsigned CCValid = MI.getOperand(3).getImm();
unsigned CCMask = MI.getOperand(4).getImm();
LivePhysRegs LiveRegs(&TII->getRegisterInfo());
LiveRegs.addLiveOuts(MBB);
for (auto I = std::prev(MBB.end()); I != MBBI; --I)
LiveRegs.stepBackward(*I);
// Splice MBB at MI, moving the rest of the block into RestMBB.
MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB);
MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB);
RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end());
RestMBB->transferSuccessors(&MBB);
for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
RestMBB->addLiveIn(*I);
// Create a new block MoveMBB to hold the move instruction.
MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB);
MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB);
MoveMBB->addLiveIn(SrcReg);
for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
MoveMBB->addLiveIn(*I);
// At the end of MBB, create a conditional branch to RestMBB if the
// condition is false, otherwise fall through to MoveMBB.
BuildMI(&MBB, DL, TII->get(SystemZ::BRC))
.addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB);
MBB.addSuccessor(RestMBB);
MBB.addSuccessor(MoveMBB);
// In MoveMBB, emit an instruction to move SrcReg into DestReg,
// then fall through to RestMBB.
TII->copyPhysReg(*MoveMBB, MoveMBB->end(), DL, DestReg, SrcReg,
MI.getOperand(2).isKill());
MoveMBB->addSuccessor(RestMBB);
NextMBBI = MBB.end();
MI.eraseFromParent();
return true;
}
/// \brief If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
bool SystemZExpandPseudo::expandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
MachineInstr &MI = *MBBI;
switch (MI.getOpcode()) {
case SystemZ::LOCRMux:
return expandLOCRMux(MBB, MBBI, NextMBBI);
default:
break;
}
return false;
}
/// \brief Iterate over the instructions in basic block MBB and expand any
/// pseudo instructions. Return true if anything was modified.
bool SystemZExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
Modified |= expandMI(MBB, MBBI, NMBBI);
MBBI = NMBBI;
}
return Modified;
}
bool SystemZExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
bool Modified = false;
for (auto &MBB : MF)
Modified |= expandMBB(MBB);
return Modified;
}

View File

@ -1296,8 +1296,14 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
// Prefer to put any load first, so that it can be matched as a
// conditional load.
if (Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) {
// conditional load. Likewise for constants in range for LOCHI.
if ((Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) ||
(Subtarget->hasLoadStoreOnCond2() &&
Node->getValueType(0).isInteger() &&
Op1.getOpcode() == ISD::Constant &&
isInt<16>(cast<ConstantSDNode>(Op1)->getSExtValue()) &&
!(Op0.getOpcode() == ISD::Constant &&
isInt<16>(cast<ConstantSDNode>(Op0)->getSExtValue())))) {
SDValue CCValid = Node->getOperand(2);
SDValue CCMask = Node->getOperand(3);
uint64_t ConstCCValid =

View File

@ -5224,7 +5224,8 @@ static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
MachineBasicBlock *
SystemZTargetLowering::emitSelect(MachineInstr &MI,
MachineBasicBlock *MBB) const {
MachineBasicBlock *MBB,
unsigned LOCROpcode) const {
const SystemZInstrInfo *TII =
static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
@ -5235,6 +5236,15 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
unsigned CCMask = MI.getOperand(4).getImm();
DebugLoc DL = MI.getDebugLoc();
// Use LOCROpcode if possible.
if (LOCROpcode && Subtarget.hasLoadStoreOnCond()) {
BuildMI(*MBB, MI, DL, TII->get(LOCROpcode), DestReg)
.addReg(FalseReg).addReg(TrueReg)
.addImm(CCValid).addImm(CCMask);
MI.eraseFromParent();
return MBB;
}
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *JoinMBB = splitBlockBefore(MI, MBB);
MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
@ -6020,12 +6030,16 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *MBB) const {
switch (MI.getOpcode()) {
case SystemZ::Select32Mux:
return emitSelect(MI, MBB,
Subtarget.hasLoadStoreOnCond2()? SystemZ::LOCRMux : 0);
case SystemZ::Select32:
case SystemZ::SelectF32:
return emitSelect(MI, MBB, SystemZ::LOCR);
case SystemZ::Select64:
return emitSelect(MI, MBB, SystemZ::LOCGR);
case SystemZ::SelectF32:
case SystemZ::SelectF64:
case SystemZ::SelectF128:
return emitSelect(MI, MBB);
return emitSelect(MI, MBB, 0);
case SystemZ::CondStore8Mux:
return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
@ -6035,6 +6049,10 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
case SystemZ::CondStore16MuxInv:
return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
case SystemZ::CondStore32Mux:
return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
case SystemZ::CondStore32MuxInv:
return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
case SystemZ::CondStore8:
return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
case SystemZ::CondStore8Inv:

View File

@ -561,7 +561,8 @@ private:
MachineBasicBlock *Target) const;
// Implement EmitInstrWithCustomInserter for individual operation types.
MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB,
unsigned LOCROpcode) const;
MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB,
unsigned StoreOpcode, unsigned STOCOpcode,
bool Invert) const;

View File

@ -2201,77 +2201,6 @@ class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let OpType = "reg";
}
// These instructions are generated by if conversion. The old value of R1
// is added as an implicit use.
class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
: InstRRFc<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$M3),
mnemonic#"$M3\t$R1, $R2", []> {
let CCMaskLast = 1;
}
class CondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
Immediate imm>
: InstRIEg<opcode, (outs cls:$R1), (ins imm:$I2, cond4:$valid, cond4:$M3),
mnemonic#"$M3\t$R1, $I2", []> {
let CCMaskLast = 1;
}
// Like CondUnaryRRF, but used for the raw assembly form. The condition-code
// mask is the third operand rather than being part of the mnemonic.
class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
: InstRRFc<opcode, (outs cls1:$R1),
(ins cls1:$R1src, cls2:$R2, imm32zx4:$M3),
mnemonic#"\t$R1, $R2, $M3", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class AsmCondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
Immediate imm>
: InstRIEg<opcode, (outs cls:$R1),
(ins cls:$R1src, imm:$I2, imm32zx4:$M3),
mnemonic#"\t$R1, $I2, $M3", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
// Like CondUnaryRRF, but with a fixed CC mask.
class FixedCondUnaryRRF<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2>
: InstRRFc<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
mnemonic#V.suffix#"\t$R1, $R2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
let isAsmParserOnly = V.alternate;
let M3 = V.ccmask;
}
class FixedCondUnaryRIE<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm>
: InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
mnemonic#V.suffix#"\t$R1, $I2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
let isAsmParserOnly = V.alternate;
let M3 = V.ccmask;
}
multiclass CondUnaryRRFPair<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2> {
let isCodeGenOnly = 1 in
def "" : CondUnaryRRF<mnemonic, opcode, cls1, cls2>;
def Asm : AsmCondUnaryRRF<mnemonic, opcode, cls1, cls2>;
}
multiclass CondUnaryRIEPair<string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm> {
let isCodeGenOnly = 1 in
def "" : CondUnaryRIE<mnemonic, opcode, cls, imm>;
def Asm : AsmCondUnaryRIE<mnemonic, opcode, cls, imm>;
}
class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
: InstRIa<opcode, (outs cls:$R1), (ins imm:$I2),
@ -2578,6 +2507,45 @@ class BinaryRRFe<string mnemonic, bits<16> opcode, RegisterOperand cls1,
let M4 = 0;
}
class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
: InstRRFc<opcode, (outs cls1:$R1),
(ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3),
mnemonic#"$M3\t$R1, $R2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
let CCMaskLast = 1;
}
// Like CondBinaryRRF, but used for the raw assembly form. The condition-code
// mask is the third operand rather than being part of the mnemonic.
class AsmCondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
: InstRRFc<opcode, (outs cls1:$R1),
(ins cls1:$R1src, cls2:$R2, imm32zx4:$M3),
mnemonic#"\t$R1, $R2, $M3", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
// Like CondBinaryRRF, but with a fixed CC mask.
class FixedCondBinaryRRF<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2>
: InstRRFc<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
mnemonic#V.suffix#"\t$R1, $R2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
let isAsmParserOnly = V.alternate;
let M3 = V.ccmask;
}
multiclass CondBinaryRRFPair<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2> {
let isCodeGenOnly = 1 in
def "" : CondBinaryRRF<mnemonic, opcode, cls1, cls2>;
def Asm : AsmCondBinaryRRF<mnemonic, opcode, cls1, cls2>;
}
class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
: InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
@ -2605,6 +2573,47 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
}
}
class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
Immediate imm>
: InstRIEg<opcode, (outs cls:$R1),
(ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
mnemonic#"$M3\t$R1, $I2",
[(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src,
cond4:$valid, cond4:$M3))]> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
let CCMaskLast = 1;
}
// Like CondBinaryRIE, but used for the raw assembly form. The condition-code
// mask is the third operand rather than being part of the mnemonic.
class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
Immediate imm>
: InstRIEg<opcode, (outs cls:$R1),
(ins cls:$R1src, imm:$I2, imm32zx4:$M3),
mnemonic#"\t$R1, $I2, $M3", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
// Like CondBinaryRIE, but with a fixed CC mask.
class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm>
: InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
mnemonic#V.suffix#"\t$R1, $I2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
let isAsmParserOnly = V.alternate;
let M3 = V.ccmask;
}
multiclass CondBinaryRIEPair<string mnemonic, bits<16> opcode,
RegisterOperand cls, Immediate imm> {
let isCodeGenOnly = 1 in
def "" : CondBinaryRIE<mnemonic, opcode, cls, imm>;
def Asm : AsmCondBinaryRIE<mnemonic, opcode, cls, imm>;
}
class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
: InstRILa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
@ -3644,6 +3653,54 @@ class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
let AccessBytes = bytes;
}
// Like CondBinaryRRF, but expanded after RA depending on the choice of
// register.
class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2>
: Pseudo<(outs cls1:$R1),
(ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3), []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
let CCMaskLast = 1;
}
// Like CondBinaryRIE, but expanded after RA depending on the choice of
// register.
class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm>
: Pseudo<(outs cls:$R1),
(ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
[(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src,
cond4:$valid, cond4:$M3))]> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
let CCMaskLast = 1;
}
// Like CondUnaryRSY, but expanded after RA depending on the choice of
// register.
class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,
bits<5> bytes, AddressingMode mode = bdaddr20only>
: Pseudo<(outs cls:$R1),
(ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3),
[(set cls:$R1,
(z_select_ccmask (operator mode:$BD2), cls:$R1src,
cond4:$valid, cond4:$R3))]> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
let mayLoad = 1;
let AccessBytes = bytes;
let CCMaskLast = 1;
}
// Like CondStoreRSY, but expanded after RA depending on the choice of
// register.
class CondStoreRSYPseudo<RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
: Pseudo<(outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3), []> {
let mayStore = 1;
let AccessBytes = bytes;
let CCMaskLast = 1;
}
// Like StoreRXY, but expanded after RA depending on the choice of register.
class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
bits<5> bytes, AddressingMode mode = bdxaddr20only>

View File

@ -149,6 +149,37 @@ void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode,
MI.setDesc(get(Opcode));
}
// MI is a load-on-condition pseudo instruction with a single register
// (source or destination) operand. Replace it with LowOpcode if the
// register is a low GR32 and HighOpcode if the register is a high GR32.
void SystemZInstrInfo::expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode) const {
unsigned Reg = MI.getOperand(0).getReg();
unsigned Opcode = isHighReg(Reg) ? HighOpcode : LowOpcode;
MI.setDesc(get(Opcode));
}
// MI is a load-register-on-condition pseudo instruction. Replace it with
// LowOpcode if source and destination are both low GR32s and HighOpcode if
// source and destination are both high GR32s.
void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode) const {
unsigned DestReg = MI.getOperand(0).getReg();
unsigned SrcReg = MI.getOperand(2).getReg();
bool DestIsHigh = isHighReg(DestReg);
bool SrcIsHigh = isHighReg(SrcReg);
if (!DestIsHigh && !SrcIsHigh)
MI.setDesc(get(LowOpcode));
else if (DestIsHigh && SrcIsHigh)
MI.setDesc(get(HighOpcode));
// If we were unable to implement the pseudo with a single instruction, we
// need to convert it back into a branch sequence. This cannot be done here
// since the caller of expandPostRAPseudo does not handle changes to the CFG
// correctly. This change is defered to the SystemZExpandPseudo pass.
}
// MI is an RR-style pseudo instruction that zero-extends the low Size bits
// of one GRX32 into another. Replace it with LowOpcode if both operands
// are low registers, otherwise use RISB[LH]G.
@ -222,6 +253,36 @@ void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
.addImm(32 - Size).addImm(128 + 31).addImm(Rotate);
}
MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool NewMI,
unsigned OpIdx1,
unsigned OpIdx2) const {
auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
if (NewMI)
return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
return MI;
};
switch (MI.getOpcode()) {
case SystemZ::LOCRMux:
case SystemZ::LOCFHR:
case SystemZ::LOCR:
case SystemZ::LOCGR: {
auto &WorkingMI = cloneIfNew(MI);
// Invert condition.
unsigned CCValid = WorkingMI.getOperand(3).getImm();
unsigned CCMask = WorkingMI.getOperand(4).getImm();
WorkingMI.getOperand(4).setImm(CCMask ^ CCValid);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
default:
return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
}
}
// If MI is a simple load or store for a frame object, return the register
// it loads or stores and set FrameIndex to the index of the frame object.
// Return 0 otherwise.
@ -525,30 +586,128 @@ bool SystemZInstrInfo::optimizeCompareInstr(
removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
}
// If Opcode is a move that has a conditional variant, return that variant,
// otherwise return 0.
static unsigned getConditionalMove(unsigned Opcode) {
switch (Opcode) {
case SystemZ::LR: return SystemZ::LOCR;
case SystemZ::LGR: return SystemZ::LOCGR;
default: return 0;
bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
ArrayRef<MachineOperand> Pred,
unsigned TrueReg, unsigned FalseReg,
int &CondCycles, int &TrueCycles,
int &FalseCycles) const {
// Not all subtargets have LOCR instructions.
if (!STI.hasLoadStoreOnCond())
return false;
if (Pred.size() != 2)
return false;
// Check register classes.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC =
RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
if (!RC)
return false;
// We have LOCR instructions for 32 and 64 bit general purpose registers.
if ((STI.hasLoadStoreOnCond2() &&
SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) ||
SystemZ::GR32BitRegClass.hasSubClassEq(RC) ||
SystemZ::GR64BitRegClass.hasSubClassEq(RC)) {
CondCycles = 2;
TrueCycles = 2;
FalseCycles = 2;
return true;
}
// Can't do anything else.
return false;
}
static unsigned getConditionalLoadImmediate(unsigned Opcode) {
switch (Opcode) {
case SystemZ::LHI: return SystemZ::LOCHI;
case SystemZ::LGHI: return SystemZ::LOCGHI;
default: return 0;
void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, unsigned DstReg,
ArrayRef<MachineOperand> Pred,
unsigned TrueReg,
unsigned FalseReg) const {
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
assert(Pred.size() == 2 && "Invalid condition");
unsigned CCValid = Pred[0].getImm();
unsigned CCMask = Pred[1].getImm();
unsigned Opc;
if (SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) {
if (STI.hasLoadStoreOnCond2())
Opc = SystemZ::LOCRMux;
else {
Opc = SystemZ::LOCR;
MRI.constrainRegClass(DstReg, &SystemZ::GR32BitRegClass);
}
} else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC))
Opc = SystemZ::LOCGR;
else
llvm_unreachable("Invalid register class");
BuildMI(MBB, I, DL, get(Opc), DstReg)
.addReg(FalseReg).addReg(TrueReg)
.addImm(CCValid).addImm(CCMask);
}
bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
unsigned Reg,
MachineRegisterInfo *MRI) const {
unsigned DefOpc = DefMI.getOpcode();
if (DefOpc != SystemZ::LHIMux && DefOpc != SystemZ::LHI &&
DefOpc != SystemZ::LGHI)
return false;
if (DefMI.getOperand(0).getReg() != Reg)
return false;
int32_t ImmVal = (int32_t)DefMI.getOperand(1).getImm();
unsigned UseOpc = UseMI.getOpcode();
unsigned NewUseOpc;
unsigned UseIdx;
int CommuteIdx = -1;
switch (UseOpc) {
case SystemZ::LOCRMux:
if (!STI.hasLoadStoreOnCond2())
return false;
NewUseOpc = SystemZ::LOCHIMux;
if (UseMI.getOperand(2).getReg() == Reg)
UseIdx = 2;
else if (UseMI.getOperand(1).getReg() == Reg)
UseIdx = 2, CommuteIdx = 1;
else
return false;
break;
case SystemZ::LOCGR:
if (!STI.hasLoadStoreOnCond2())
return false;
NewUseOpc = SystemZ::LOCGHI;
if (UseMI.getOperand(2).getReg() == Reg)
UseIdx = 2;
else if (UseMI.getOperand(1).getReg() == Reg)
UseIdx = 2, CommuteIdx = 1;
else
return false;
break;
default:
return false;
}
if (CommuteIdx != -1)
if (!commuteInstruction(UseMI, false, CommuteIdx, UseIdx))
return false;
bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
UseMI.setDesc(get(NewUseOpc));
UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal);
if (DeleteDef)
DefMI.eraseFromParent();
return true;
}
bool SystemZInstrInfo::isPredicable(MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
if (STI.hasLoadStoreOnCond() && getConditionalMove(Opcode))
return true;
if (STI.hasLoadStoreOnCond2() && getConditionalLoadImmediate(Opcode))
return true;
if (Opcode == SystemZ::Return ||
Opcode == SystemZ::Trap ||
Opcode == SystemZ::CallJG ||
@ -600,26 +759,6 @@ bool SystemZInstrInfo::PredicateInstruction(
unsigned CCMask = Pred[1].getImm();
assert(CCMask > 0 && CCMask < 15 && "Invalid predicate");
unsigned Opcode = MI.getOpcode();
if (STI.hasLoadStoreOnCond()) {
if (unsigned CondOpcode = getConditionalMove(Opcode)) {
MI.setDesc(get(CondOpcode));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(CCValid)
.addImm(CCMask)
.addReg(SystemZ::CC, RegState::Implicit);
return true;
}
}
if (STI.hasLoadStoreOnCond2()) {
if (unsigned CondOpcode = getConditionalLoadImmediate(Opcode)) {
MI.setDesc(get(CondOpcode));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
.addImm(CCValid)
.addImm(CCMask)
.addReg(SystemZ::CC, RegState::Implicit);
return true;
}
}
if (Opcode == SystemZ::Trap) {
MI.setDesc(get(SystemZ::CondTrap));
MachineInstrBuilder(*MI.getParent()->getParent(), MI)
@ -1090,6 +1229,18 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
expandRXYPseudo(MI, SystemZ::L, SystemZ::LFH);
return true;
case SystemZ::LOCMux:
expandLOCPseudo(MI, SystemZ::LOC, SystemZ::LOCFH);
return true;
case SystemZ::LOCHIMux:
expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI);
return true;
case SystemZ::LOCRMux:
expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR);
return true;
case SystemZ::STCMux:
expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH);
return true;
@ -1102,6 +1253,10 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
expandRXYPseudo(MI, SystemZ::ST, SystemZ::STFH);
return true;
case SystemZ::STOCMux:
expandLOCPseudo(MI, SystemZ::STOC, SystemZ::STOCFH);
return true;
case SystemZ::LHIMux:
expandRIPseudo(MI, SystemZ::LHI, SystemZ::IIHF, true);
return true;

View File

@ -142,6 +142,10 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
unsigned LowOpcodeK, unsigned HighOpcode) const;
void expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode) const;
void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode) const;
void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode) const;
void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned Size) const;
void expandLoadStackGuard(MachineInstr *MI) const;
@ -149,7 +153,23 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
unsigned LowLowOpcode, unsigned Size, bool KillSrc) const;
virtual void anchor();
protected:
/// Commutes the operands in the given instruction by changing the operands
/// order and/or changing the instruction's opcode and/or the immediate value
/// operand.
///
/// The arguments 'CommuteOpIdx1' and 'CommuteOpIdx2' specify the operands
/// to be commuted.
///
/// Do not call this method for a non-commutable instruction or
/// non-commutable operands.
/// Even though the instruction is commutable, the method may still
/// fail to commute the operands, null pointer is returned in such cases.
MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
unsigned CommuteOpIdx1,
unsigned CommuteOpIdx2) const override;
public:
explicit SystemZInstrInfo(SystemZSubtarget &STI);
@ -175,6 +195,14 @@ public:
bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
unsigned SrcReg2, int Mask, int Value,
const MachineRegisterInfo *MRI) const override;
bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
unsigned, unsigned, int&, int&, int&) const override;
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DstReg,
ArrayRef<MachineOperand> Cond, unsigned TrueReg,
unsigned FalseReg) const override;
bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
MachineRegisterInfo *MRI) const override;
bool isPredicable(MachineInstr &MI) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
unsigned ExtraPredCycles,

View File

@ -305,14 +305,17 @@ def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>;
def Select32 : SelectWrapper<GR32>;
def Select64 : SelectWrapper<GR64>;
// We don't define 32-bit Mux stores because the low-only STOC should
// always be used if possible.
// We don't define 32-bit Mux stores if we don't have STOCFH, because the
// low-only STOC should then always be used if possible.
defm CondStore8Mux : CondStores<GRX32, nonvolatile_truncstorei8,
nonvolatile_anyextloadi8, bdxaddr20only>,
Requires<[FeatureHighWord]>;
defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16,
nonvolatile_anyextloadi16, bdxaddr20only>,
Requires<[FeatureHighWord]>;
defm CondStore32Mux : CondStores<GRX32, nonvolatile_store,
nonvolatile_load, bdxaddr20only>,
Requires<[FeatureLoadStoreOnCond2]>;
defm CondStore8 : CondStores<GR32, nonvolatile_truncstorei8,
nonvolatile_anyextloadi8, bdxaddr20only>;
defm CondStore16 : CondStores<GR32, nonvolatile_truncstorei16,
@ -446,24 +449,57 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in
//===----------------------------------------------------------------------===//
let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
// Load immediate on condition. Created by if-conversion.
defm LOCHI : CondUnaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>;
defm LOCGHI : CondUnaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>;
// Load immediate on condition. Matched via DAG pattern and created
// by the PeepholeOptimizer via FoldImmediate.
let hasSideEffects = 0 in {
// Expands to LOCHI or LOCHHI, depending on the choice of register.
def LOCHIMux : CondBinaryRIEPseudo<GRX32, imm32sx16>;
defm LOCHHI : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>;
defm LOCHI : CondBinaryRIEPair<"lochi", 0xEC42, GR32, imm32sx16>;
defm LOCGHI : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>;
}
// Move register on condition. Expanded from Select* pseudos and
// created by early if-conversion.
let hasSideEffects = 0, isCommutable = 1 in {
// Expands to LOCR or LOCFHR or a branch-and-move sequence,
// depending on the choice of registers.
def LOCRMux : CondBinaryRRFPseudo<GRX32, GRX32>;
defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>;
}
// Load on condition. Matched via DAG pattern.
// Expands to LOC or LOCFH, depending on the choice of register.
def LOCMux : CondUnaryRSYPseudo<nonvolatile_load, GRX32, 4>;
defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, nonvolatile_load, GRH32, 4>;
// Store on condition. Expanded from CondStore* pseudos.
// Expands to STOC or STOCFH, depending on the choice of register.
def STOCMux : CondStoreRSYPseudo<GRX32, 4>;
defm STOCFH : CondStoreRSYPair<"stocfh", 0xEBE1, GRH32, 4>;
// Define AsmParser extended mnemonics for each general condition-code mask.
foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
"Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
def LOCHIAsm#V : FixedCondUnaryRIE<CV<V>, "lochi", 0xEC42, GR32,
imm32sx16>;
def LOCGHIAsm#V : FixedCondUnaryRIE<CV<V>, "locghi", 0xEC46, GR64,
imm64sx16>;
def LOCHIAsm#V : FixedCondBinaryRIE<CV<V>, "lochi", 0xEC42, GR32,
imm32sx16>;
def LOCGHIAsm#V : FixedCondBinaryRIE<CV<V>, "locghi", 0xEC46, GR64,
imm64sx16>;
def LOCHHIAsm#V : FixedCondBinaryRIE<CV<V>, "lochhi", 0xEC4E, GRH32,
imm32sx16>;
def LOCFHRAsm#V : FixedCondBinaryRRF<CV<V>, "locfhr", 0xB9E0, GRH32, GRH32>;
def LOCFHAsm#V : FixedCondUnaryRSY<CV<V>, "locfh", 0xEBE0, GRH32, 4>;
def STOCFHAsm#V : FixedCondStoreRSY<CV<V>, "stocfh", 0xEBE1, GRH32, 4>;
}
}
let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in {
// Move register on condition. Created by if-conversion.
defm LOCR : CondUnaryRRFPair<"locr", 0xB9F2, GR32, GR32>;
defm LOCGR : CondUnaryRRFPair<"locgr", 0xB9E2, GR64, GR64>;
// Move register on condition. Expanded from Select* pseudos and
// created by early if-conversion.
let hasSideEffects = 0, isCommutable = 1 in {
defm LOCR : CondBinaryRRFPair<"locr", 0xB9F2, GR32, GR32>;
defm LOCGR : CondBinaryRRFPair<"locgr", 0xB9E2, GR64, GR64>;
}
// Load on condition. Matched via DAG pattern.
defm LOC : CondUnaryRSYPair<"loc", 0xEBF2, nonvolatile_load, GR32, 4>;
@ -476,8 +512,8 @@ let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in {
// Define AsmParser extended mnemonics for each general condition-code mask.
foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
"Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
def LOCRAsm#V : FixedCondUnaryRRF<CV<V>, "locr", 0xB9F2, GR32, GR32>;
def LOCGRAsm#V : FixedCondUnaryRRF<CV<V>, "locgr", 0xB9E2, GR64, GR64>;
def LOCRAsm#V : FixedCondBinaryRRF<CV<V>, "locr", 0xB9F2, GR32, GR32>;
def LOCGRAsm#V : FixedCondBinaryRRF<CV<V>, "locgr", 0xB9E2, GR64, GR64>;
def LOCAsm#V : FixedCondUnaryRSY<CV<V>, "loc", 0xEBF2, GR32, 4>;
def LOCGAsm#V : FixedCondUnaryRSY<CV<V>, "locg", 0xEBE2, GR64, 8>;
def STOCAsm#V : FixedCondStoreRSY<CV<V>, "stoc", 0xEBF3, GR32, 4>;
@ -1108,17 +1144,19 @@ def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>;
// Division and remainder
//===----------------------------------------------------------------------===//
// Division and remainder, from registers.
def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>;
def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>;
def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>;
def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>;
let hasSideEffects = 1 in { // Do not speculatively execute.
// Division and remainder, from registers.
def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>;
def DSGR : BinaryRRE<"dsgr", 0xB90D, z_sdivrem64, GR128, GR64>;
def DLR : BinaryRRE<"dlr", 0xB997, z_udivrem32, GR128, GR32>;
def DLGR : BinaryRRE<"dlgr", 0xB987, z_udivrem64, GR128, GR64>;
// Division and remainder, from memory.
def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>;
def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>;
def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>;
def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>;
// Division and remainder, from memory.
def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>;
def DSG : BinaryRXY<"dsg", 0xE30D, z_sdivrem64, GR128, load, 8>;
def DL : BinaryRXY<"dl", 0xE397, z_udivrem32, GR128, load, 4>;
def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>;
}
//===----------------------------------------------------------------------===//
// Shifts

View File

@ -23,7 +23,7 @@ def Z13Model : SchedMachineModel {
let PostRAScheduler = 1;
// Extra cycles for a mispredicted branch.
let MispredictPenalty = 8;
let MispredictPenalty = 20;
}
let SchedModel = Z13Model in {
@ -161,6 +161,7 @@ def : InstRW<[FXa], (instregex "Select(32|64|32Mux)$")>;
def : InstRW<[FXa], (instregex "CondStore16(Inv)?$")>;
def : InstRW<[FXa], (instregex "CondStore16Mux(Inv)?$")>;
def : InstRW<[FXa], (instregex "CondStore32(Inv)?$")>;
def : InstRW<[FXa], (instregex "CondStore32Mux(Inv)?$")>;
def : InstRW<[FXa], (instregex "CondStore64(Inv)?$")>;
def : InstRW<[FXa], (instregex "CondStore8(Inv)?$")>;
def : InstRW<[FXa], (instregex "CondStore8Mux(Inv)?$")>;
@ -214,10 +215,11 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>;
// Conditional move instructions
//===----------------------------------------------------------------------===//
def : InstRW<[FXa, Lat2], (instregex "LOC(G)?R(Asm.*)?$")>;
def : InstRW<[FXa, Lat2], (instregex "LOC(G)?HI(Asm.*)?$")>;
def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G)?(Asm.*)?$")>;
def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G)?(Asm.*)?$")>;
def : InstRW<[FXa, Lat2], (instregex "LOCRMux$")>;
def : InstRW<[FXa, Lat2], (instregex "LOC(G|FH)?R(Asm.*)?$")>;
def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Asm.*)?$")>;
def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>;
def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>;
//===----------------------------------------------------------------------===//
// Sign extensions

View File

@ -23,7 +23,7 @@ def Z196Model : SchedMachineModel {
let PostRAScheduler = 1;
// Extra cycles for a mispredicted branch.
let MispredictPenalty = 8;
let MispredictPenalty = 16;
}
let SchedModel = Z196Model in {
@ -187,7 +187,6 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[FXU, Lat2, EndGroup], (instregex "LOC(G)?R(Asm.*)?$")>;
def : InstRW<[FXU, Lat2, EndGroup], (instregex "LOC(G)?HI(Asm.*)?$")>;
def : InstRW<[FXU, LSU, Lat6, EndGroup], (instregex "LOC(G)?(Asm.*)?$")>;
def : InstRW<[FXU, LSU, Lat5, EndGroup], (instregex "STOC(G)?(Asm.*)?$")>;

View File

@ -23,7 +23,7 @@ def ZEC12Model : SchedMachineModel {
let PostRAScheduler = 1;
// Extra cycles for a mispredicted branch.
let MispredictPenalty = 8;
let MispredictPenalty = 16;
}
let SchedModel = ZEC12Model in {
@ -189,7 +189,6 @@ def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>;
//===----------------------------------------------------------------------===//
def : InstRW<[FXU, Lat2], (instregex "LOC(G)?R(Asm.*)?$")>;
def : InstRW<[FXU, Lat2], (instregex "LOC(G)?HI(Asm.*)?$")>;
def : InstRW<[FXU, LSU, Lat6], (instregex "LOC(G)?(Asm.*)?$")>;
def : InstRW<[FXU, LSU, Lat5], (instregex "STOC(G)?(Asm.*)?$")>;

View File

@ -78,6 +78,9 @@ public:
// This is important for reducing register pressure in vector code.
bool useAA() const override { return true; }
// Always enable the early if-conversion pass.
bool enableEarlyIfConversion() const override { return true; }
// Automatically generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);

View File

@ -122,6 +122,7 @@ public:
void addIRPasses() override;
bool addInstSelector() override;
bool addILPOpts() override;
void addPreSched2() override;
void addPreEmitPass() override;
};
@ -143,7 +144,14 @@ bool SystemZPassConfig::addInstSelector() {
return false;
}
bool SystemZPassConfig::addILPOpts() {
addPass(&EarlyIfConverterID);
return true;
}
void SystemZPassConfig::addPreSched2() {
addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine()));
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
}

View File

@ -1,23 +0,0 @@
; Test LOCHI/LOCGHI
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
; CHECK-LABEL: bar1:
; CHECK: lhi [[REG:%r[0-5]]], 42
; CHECK: chi %r2, 0
; CHECK: lochie [[REG]], 0
define signext i32 @bar1(i32 signext %x) {
%cmp = icmp ne i32 %x, 0
%.x = select i1 %cmp, i32 42, i32 0
ret i32 %.x
}
; CHECK-LABEL: bar2:
; CHECK: ltgr [[REG:%r[0-5]]], %r2
; CHECK: lghi %r2, 42
; CHECK: locghie %r2, 0
define signext i64 @bar2(i64 signext %x) {
%cmp = icmp ne i64 %x, 0
%.x = select i1 %cmp, i64 42, i64 0
ret i64 %.x
}

View File

@ -2,6 +2,10 @@
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
; Run the test again to make sure it still works the same even
; in the presence of the load-store-on-condition-2 facility.
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare i32 @foo(i32 *)
; Test the simple case.

View File

@ -0,0 +1,159 @@
; Test LOCFH. See comments in asm-18.ll about testing high-word operations.
;
; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: -no-integrated-as | FileCheck %s
declare void @foo(i32 *)
; Test the simple case.
define void @f1(i32 *%ptr, i32 %limit) {
; CHECK-LABEL: f1:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: clfi %r3, 42
; CHECK: locfhhe [[REG]], 0(%r2)
; CHECK: br %r14
%easy = call i32 asm "stepa $0", "=h"()
%cond = icmp ult i32 %limit, 42
%other = load i32, i32 *%ptr
%res = select i1 %cond, i32 %easy, i32 %other
call void asm sideeffect "stepb $0", "h"(i32 %res)
ret void
}
; ...and again with the operands swapped.
define void @f2(i32 *%ptr, i32 %limit) {
; CHECK-LABEL: f2:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: clfi %r3, 42
; CHECK: locfhl [[REG]], 0(%r2)
; CHECK: br %r14
%easy = call i32 asm "stepa $0", "=h"()
%cond = icmp ult i32 %limit, 42
%other = load i32, i32 *%ptr
%res = select i1 %cond, i32 %other, i32 %easy
call void asm sideeffect "stepb $0", "h"(i32 %res)
ret void
}
; Check the high end of the aligned LOC range.
define void @f3(i32 *%base, i32 %limit) {
; CHECK-LABEL: f3:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: clfi %r3, 42
; CHECK: locfhhe [[REG]], 524284(%r2)
; CHECK: br %r14
%easy = call i32 asm "stepa $0", "=h"()
%ptr = getelementptr i32, i32 *%base, i64 131071
%cond = icmp ult i32 %limit, 42
%other = load i32, i32 *%ptr
%res = select i1 %cond, i32 %easy, i32 %other
call void asm sideeffect "stepb $0", "h"(i32 %res)
ret void
}
; Check the next word up. Other sequences besides this one would be OK.
define void @f4(i32 *%base, i32 %limit) {
; CHECK-LABEL: f4:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: agfi %r2, 524288
; CHECK-DAG: clfi %r3, 42
; CHECK: locfhhe [[REG]], 0(%r2)
; CHECK: br %r14
%easy = call i32 asm "stepa $0", "=h"()
%ptr = getelementptr i32, i32 *%base, i64 131072
%cond = icmp ult i32 %limit, 42
%other = load i32, i32 *%ptr
%res = select i1 %cond, i32 %easy, i32 %other
call void asm sideeffect "stepb $0", "h"(i32 %res)
ret void
}
; Check the low end of the LOC range.
define void @f5(i32 *%base, i32 %limit) {
; CHECK-LABEL: f5:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: clfi %r3, 42
; CHECK: locfhhe [[REG]], -524288(%r2)
; CHECK: br %r14
%easy = call i32 asm "stepa $0", "=h"()
%ptr = getelementptr i32, i32 *%base, i64 -131072
%cond = icmp ult i32 %limit, 42
%other = load i32, i32 *%ptr
%res = select i1 %cond, i32 %easy, i32 %other
call void asm sideeffect "stepb $0", "h"(i32 %res)
ret void
}
; Check the next word down, with the same comments as f4.
define void @f6(i32 *%base, i32 %limit) {
; CHECK-LABEL: f6:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: clfi %r3, 42
; CHECK-DAG: agfi %r2, -524292
; CHECK-DAG: clfi %r3, 42
; CHECK: locfhhe [[REG]], 0(%r2)
; CHECK: br %r14
%easy = call i32 asm "stepa $0", "=h"()
%ptr = getelementptr i32, i32 *%base, i64 -131073
%cond = icmp ult i32 %limit, 42
%other = load i32, i32 *%ptr
%res = select i1 %cond, i32 %easy, i32 %other
call void asm sideeffect "stepb $0", "h"(i32 %res)
ret void
}
; Try a frame index base.
define void @f7(i32 %alt, i32 %limit) {
; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT
; CHECK: stepa [[REG:%r[0-5]]]
; CHECK: locfhhe [[REG]], {{[0-9]+}}(%r15)
; CHECK: br %r14
%ptr = alloca i32
call void @foo(i32 *%ptr)
%easy = call i32 asm "stepa $0", "=h"()
%cond = icmp ult i32 %limit, 42
%other = load i32, i32 *%ptr
%res = select i1 %cond, i32 %easy, i32 %other
call void asm sideeffect "stepb $0", "h"(i32 %res)
ret void
}
; Try a case when an index is involved.
define void @f8(i32 %limit, i64 %base, i64 %index) {
; CHECK-LABEL: f8:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: clfi %r2, 42
; CHECK: locfhhe [[REG]], 0({{%r[1-5]}})
; CHECK: br %r14
%easy = call i32 asm "stepa $0", "=h"()
%add = add i64 %base, %index
%ptr = inttoptr i64 %add to i32 *
%cond = icmp ult i32 %limit, 42
%other = load i32, i32 *%ptr
%res = select i1 %cond, i32 %easy, i32 %other
call void asm sideeffect "stepb $0", "h"(i32 %res)
ret void
}
; Test that conditionally-executed loads do not use LOC, since it is allowed
; to trap even when the condition is false.
define void @f9(i32 %limit, i32 *%ptr) {
; CHECK-LABEL: f9:
; CHECK-NOT: loc
; CHECK: lfh
; CHECK: br %r14
entry:
%easy = call i32 asm "stepa $0", "=h"()
%cmp = icmp ule i32 %easy, %limit
br i1 %cmp, label %load, label %exit
load:
%other = load i32, i32 *%ptr
br label %exit
exit:
%res = phi i32 [ %easy, %entry ], [ %other, %load ]
call void asm sideeffect "stepb $0", "h"(i32 %res)
ret void
}

View File

@ -1,6 +1,10 @@
; Test LOCR and LOCGR.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 -verify-machineinstrs | FileCheck %s
;
; Run the test again to make sure it still works the same even
; in the presence of the load-store-on-condition-2 facility.
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
; Test LOCR.
define i32 @f1(i32 %a, i32 %b, i32 %limit) {
@ -46,3 +50,76 @@ define i64 @f4(i64 %a, i64 %b, i64 %limit) {
%res = select i1 %cond, i64 %a, i64 %b
ret i64 %res
}
; Check that we also get LOCR as a result of early if-conversion.
define i32 @f5(i32 %a, i32 %b, i32 %limit) {
; CHECK-LABEL: f5:
; CHECK: clfi %r4, 41
; CHECK: locrh %r2, %r3
; CHECK: br %r14
entry:
%cond = icmp ult i32 %limit, 42
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i32 [ %a, %if.then ], [ %b, %entry ]
ret i32 %res
}
; ... and likewise for LOCGR.
define i64 @f6(i64 %a, i64 %b, i64 %limit) {
; CHECK-LABEL: f6:
; CHECK: clgfi %r4, 41
; CHECK: locgrh %r2, %r3
; CHECK: br %r14
entry:
%cond = icmp ult i64 %limit, 42
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i64 [ %a, %if.then ], [ %b, %entry ]
ret i64 %res
}
; Check that inverting the condition works as well.
define i32 @f7(i32 %a, i32 %b, i32 %limit) {
; CHECK-LABEL: f7:
; CHECK: clfi %r4, 41
; CHECK: locrle %r2, %r3
; CHECK: br %r14
entry:
%cond = icmp ult i32 %limit, 42
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i32 [ %b, %if.then ], [ %a, %entry ]
ret i32 %res
}
; ... and likewise for LOCGR.
define i64 @f8(i64 %a, i64 %b, i64 %limit) {
; CHECK-LABEL: f8:
; CHECK: clgfi %r4, 41
; CHECK: locgrle %r2, %r3
; CHECK: br %r14
entry:
%cond = icmp ult i64 %limit, 42
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i64 [ %b, %if.then ], [ %a, %entry ]
ret i64 %res
}

View File

@ -0,0 +1,138 @@
; Test LOCHI and LOCGHI.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
define i32 @f1(i32 %x) {
; CHECK-LABEL: f1:
; CHECK: lhi [[REG:%r[0-5]]], 0
; CHECK: chi %r2, 0
; CHECK: lochilh [[REG]], 42
; CHECK: br %r14
%cond = icmp ne i32 %x, 0
%res = select i1 %cond, i32 42, i32 0
ret i32 %res
}
define i32 @f2(i32 %x, i32 %y) {
; CHECK-LABEL: f2:
; CHECK: chi %r2, 0
; CHECK: lochilh %r3, 42
; CHECK: br %r14
%cond = icmp ne i32 %x, 0
%res = select i1 %cond, i32 42, i32 %y
ret i32 %res
}
define i32 @f3(i32 %x, i32 %y) {
; CHECK-LABEL: f3:
; CHECK: chi %r2, 0
; CHECK: lochie %r3, 42
; CHECK: br %r14
%cond = icmp ne i32 %x, 0
%res = select i1 %cond, i32 %y, i32 42
ret i32 %res
}
define i64 @f4(i64 %x) {
; CHECK-LABEL: f4:
; CHECK: lghi [[REG:%r[0-5]]], 0
; CHECK: cghi %r2, 0
; CHECK: locghilh [[REG]], 42
; CHECK: br %r14
%cond = icmp ne i64 %x, 0
%res = select i1 %cond, i64 42, i64 0
ret i64 %res
}
define i64 @f5(i64 %x, i64 %y) {
; CHECK-LABEL: f5:
; CHECK: cghi %r2, 0
; CHECK: locghilh %r3, 42
; CHECK: br %r14
%cond = icmp ne i64 %x, 0
%res = select i1 %cond, i64 42, i64 %y
ret i64 %res
}
define i64 @f6(i64 %x, i64 %y) {
; CHECK-LABEL: f6:
; CHECK: cghi %r2, 0
; CHECK: locghie %r3, 42
; CHECK: br %r14
%cond = icmp ne i64 %x, 0
%res = select i1 %cond, i64 %y, i64 42
ret i64 %res
}
; Check that we also get LOCHI as a result of early if-conversion.
define i32 @f7(i32 %x, i32 %y) {
; CHECK-LABEL: f7:
; CHECK: chi %r2, 0
; CHECK: lochie %r3, 42
; CHECK: br %r14
entry:
%cond = icmp ne i32 %x, 0
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i32 [ %y, %if.then ], [ 42, %entry ]
ret i32 %res
}
; ... and the same for LOCGHI.
define i64 @f8(i64 %x, i64 %y) {
; CHECK-LABEL: f8:
; CHECK: cghi %r2, 0
; CHECK: locghie %r3, 42
; CHECK: br %r14
entry:
%cond = icmp ne i64 %x, 0
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i64 [ %y, %if.then ], [ 42, %entry ]
ret i64 %res
}
; Check that inverting the condition works as well.
define i32 @f9(i32 %x, i32 %y) {
; CHECK-LABEL: f9:
; CHECK: chi %r2, 0
; CHECK: lochilh %r3, 42
; CHECK: br %r14
entry:
%cond = icmp ne i32 %x, 0
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i32 [ 42, %if.then ], [ %y, %entry ]
ret i32 %res
}
; ... and the same for LOCGHI.
define i64 @f10(i64 %x, i64 %y) {
; CHECK-LABEL: f10:
; CHECK: cghi %r2, 0
; CHECK: locghilh %r3, 42
; CHECK: br %r14
entry:
%cond = icmp ne i64 %x, 0
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i64 [ 42, %if.then ], [ %y, %entry ]
ret i64 %res
}

View File

@ -0,0 +1,213 @@
; Test LOCFHR and LOCHHI.
; See comments in asm-18.ll about testing high-word operations.
;
; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: -no-integrated-as | FileCheck %s
define void @f1(i32 %limit) {
; CHECK-LABEL: f1:
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clfi %r2, 42
; CHECK: locfhrl [[REG2]], [[REG1]]
; CHECK: stepc [[REG2]]
; CHECK: br %r14
%a = call i32 asm sideeffect "stepa $0", "=h"()
%b = call i32 asm sideeffect "stepb $0", "=h"()
%cond = icmp ult i32 %limit, 42
%res = select i1 %cond, i32 %a, i32 %b
call void asm sideeffect "stepc $0", "h"(i32 %res)
ret void
}
; FIXME: We should commute the LOCRMux to save one move.
define void @f2(i32 %limit) {
; CHECK-LABEL: f2:
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clijhe %r2, 42,
; CHECK: risblg [[REG2]], [[REG1]], 0, 159, 32
; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32
; CHECK: stepc [[REG1]]
; CHECK: br %r14
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
%a = call i32 asm sideeffect "stepa $0", "=h"()
%b = call i32 asm sideeffect "stepb $0", "=r"()
%cond = icmp ult i32 %limit, 42
%res = select i1 %cond, i32 %a, i32 %b
call void asm sideeffect "stepc $0", "h"(i32 %res)
call void asm sideeffect "dummy $0", "h"(i32 %dummy)
ret void
}
define void @f3(i32 %limit) {
; CHECK-LABEL: f3:
; CHECK-DAG: stepa [[REG2:%r[0-5]]]
; CHECK-DAG: stepb [[REG1:%r[0-5]]]
; CHECK-DAG: clijhe %r2, 42,
; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32
; CHECK: stepc [[REG1]]
; CHECK: br %r14
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
%a = call i32 asm sideeffect "stepa $0", "=r"()
%b = call i32 asm sideeffect "stepb $0", "=h"()
%cond = icmp ult i32 %limit, 42
%res = select i1 %cond, i32 %a, i32 %b
call void asm sideeffect "stepc $0", "h"(i32 %res)
call void asm sideeffect "dummy $0", "h"(i32 %dummy)
ret void
}
; FIXME: We should commute the LOCRMux to save one move.
define void @f4(i32 %limit) {
; CHECK-LABEL: f4:
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clijhe %r2, 42,
; CHECK: risbhg [[REG2]], [[REG1]], 0, 159, 32
; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32
; CHECK: stepc [[REG1]]
; CHECK: br %r14
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
%a = call i32 asm sideeffect "stepa $0", "=r"()
%b = call i32 asm sideeffect "stepb $0", "=h"()
%cond = icmp ult i32 %limit, 42
%res = select i1 %cond, i32 %a, i32 %b
call void asm sideeffect "stepc $0", "r"(i32 %res)
call void asm sideeffect "dummy $0", "h"(i32 %dummy)
ret void
}
define void @f5(i32 %limit) {
; CHECK-LABEL: f5:
; CHECK-DAG: stepa [[REG2:%r[0-5]]]
; CHECK-DAG: stepb [[REG1:%r[0-5]]]
; CHECK-DAG: clijhe %r2, 42,
; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32
; CHECK: stepc [[REG1]]
; CHECK: br %r14
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
%a = call i32 asm sideeffect "stepa $0", "=h"()
%b = call i32 asm sideeffect "stepb $0", "=r"()
%cond = icmp ult i32 %limit, 42
%res = select i1 %cond, i32 %a, i32 %b
call void asm sideeffect "stepc $0", "r"(i32 %res)
call void asm sideeffect "dummy $0", "h"(i32 %dummy)
ret void
}
; Check that we also get LOCFHR as a result of early if-conversion.
define void @f6(i32 %limit) {
; CHECK-LABEL: f6:
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clfi %r2, 41
; CHECK: locfhrle [[REG2]], [[REG1]]
; CHECK: stepc [[REG2]]
; CHECK: br %r14
entry:
%a = call i32 asm sideeffect "stepa $0", "=h"()
%b = call i32 asm sideeffect "stepb $0", "=h"()
%cond = icmp ult i32 %limit, 42
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i32 [ %a, %if.then ], [ %b, %entry ]
call void asm sideeffect "stepc $0", "h"(i32 %res)
ret void
}
; Check that inverting the condition works as well.
define void @f7(i32 %limit) {
; CHECK-LABEL: f7:
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clfi %r2, 41
; CHECK: locfhrh [[REG2]], [[REG1]]
; CHECK: stepc [[REG2]]
; CHECK: br %r14
entry:
%a = call i32 asm sideeffect "stepa $0", "=h"()
%b = call i32 asm sideeffect "stepb $0", "=h"()
%cond = icmp ult i32 %limit, 42
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i32 [ %b, %if.then ], [ %a, %entry ]
call void asm sideeffect "stepc $0", "h"(i32 %res)
ret void
}
define void @f8(i32 %limit) {
; CHECK-LABEL: f8:
; CHECK: clfi %r2, 42
; CHECK: lochhil [[REG:%r[0-5]]], 32767
; CHECK: stepa [[REG]]
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%res = select i1 %cond, i32 32767, i32 0
call void asm sideeffect "stepa $0", "h"(i32 %res)
ret void
}
define void @f9(i32 %limit) {
; CHECK-LABEL: f9:
; CHECK: clfi %r2, 42
; CHECK: lochhil [[REG:%r[0-5]]], -32768
; CHECK: stepa [[REG]]
; CHECK: br %r14
%cond = icmp ult i32 %limit, 42
%res = select i1 %cond, i32 -32768, i32 0
call void asm sideeffect "stepa $0", "h"(i32 %res)
ret void
}
; Check that we also get LOCHHI as a result of early if-conversion.
define void @f10(i32 %limit) {
; CHECK-LABEL: f10:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: clfi %r2, 41
; CHECK: lochhile [[REG]], 123
; CHECK: stepb [[REG]]
; CHECK: br %r14
entry:
%a = call i32 asm sideeffect "stepa $0", "=h"()
%cond = icmp ult i32 %limit, 42
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i32 [ 123, %if.then ], [ %a, %entry ]
call void asm sideeffect "stepb $0", "h"(i32 %res)
ret void
}
; Check that inverting the condition works as well.
define void @f11(i32 %limit) {
; CHECK-LABEL: f11:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: clfi %r2, 41
; CHECK: lochhih [[REG]], 123
; CHECK: stepb [[REG]]
; CHECK: br %r14
entry:
%a = call i32 asm sideeffect "stepa $0", "=h"()
%cond = icmp ult i32 %limit, 42
br i1 %cond, label %if.then, label %return
if.then:
br label %return
return:
%res = phi i32 [ %a, %if.then ], [ 123, %entry ]
call void asm sideeffect "stepb $0", "h"(i32 %res)
ret void
}

View File

@ -2,6 +2,10 @@
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
; Run the test again to make sure it still works the same even
; in the presence of the load-store-on-condition-2 facility.
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
declare void @foo(i32 *)
; Test the simple case, with the loaded value first.

View File

@ -0,0 +1,142 @@
; Test STOCFHs that are presented as selects.
; See comments in asm-18.ll about testing high-word operations.
;
; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: -no-integrated-as | FileCheck %s
declare void @foo(i32 *)
; Test the simple case, with the loaded value first.
define void @f1(i32 *%ptr, i32 %limit) {
; CHECK-LABEL: f1:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: clfi %r3, 42
; CHECK: stocfhhe [[REG]], 0(%r2)
; CHECK: br %r14
%alt = call i32 asm "stepa $0", "=h"()
%cond = icmp ult i32 %limit, 42
%orig = load i32, i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; ...and with the loaded value second
define void @f2(i32 *%ptr, i32 %limit) {
; CHECK-LABEL: f2:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: clfi %r3, 42
; CHECK: stocfhl [[REG]], 0(%r2)
; CHECK: br %r14
%alt = call i32 asm "stepa $0", "=h"()
%cond = icmp ult i32 %limit, 42
%orig = load i32, i32 *%ptr
%res = select i1 %cond, i32 %alt, i32 %orig
store i32 %res, i32 *%ptr
ret void
}
; Check the high end of the aligned STOC range.
define void @f3(i32 *%base, i32 %limit) {
; CHECK-LABEL: f3:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: clfi %r3, 42
; CHECK: stocfhhe [[REG]], 524284(%r2)
; CHECK: br %r14
%alt = call i32 asm "stepa $0", "=h"()
%ptr = getelementptr i32, i32 *%base, i64 131071
%cond = icmp ult i32 %limit, 42
%orig = load i32, i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; Check the next word up. Other sequences besides this one would be OK.
define void @f4(i32 *%base, i32 %limit) {
; CHECK-LABEL: f4:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: agfi %r2, 524288
; CHECK-DAG: clfi %r3, 42
; CHECK: stocfhhe [[REG]], 0(%r2)
; CHECK: br %r14
%alt = call i32 asm "stepa $0", "=h"()
%ptr = getelementptr i32, i32 *%base, i64 131072
%cond = icmp ult i32 %limit, 42
%orig = load i32, i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; Check the low end of the STOC range.
define void @f5(i32 *%base, i32 %limit) {
; CHECK-LABEL: f5:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: clfi %r3, 42
; CHECK: stocfhhe [[REG]], -524288(%r2)
; CHECK: br %r14
%alt = call i32 asm "stepa $0", "=h"()
%ptr = getelementptr i32, i32 *%base, i64 -131072
%cond = icmp ult i32 %limit, 42
%orig = load i32, i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; Check the next word down, with the same comments as f8.
define void @f6(i32 *%base, i32 %limit) {
; CHECK-LABEL: f6:
; CHECK-DAG: stepa [[REG:%r[0-5]]]
; CHECK-DAG: agfi %r2, -524292
; CHECK-DAG: clfi %r3, 42
; CHECK: stocfhhe [[REG]], 0(%r2)
; CHECK: br %r14
%alt = call i32 asm "stepa $0", "=h"()
%ptr = getelementptr i32, i32 *%base, i64 -131073
%cond = icmp ult i32 %limit, 42
%orig = load i32, i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
ret void
}
; Try a frame index base.
define void @f7(i32 %limit) {
; CHECK-LABEL: f7:
; CHECK: brasl %r14, foo@PLT
; CHECK: stepa [[REG:%r[0-5]]]
; CHECK: stocfhhe [[REG]], {{[0-9]+}}(%r15)
; CHECK: brasl %r14, foo@PLT
; CHECK: br %r14
%ptr = alloca i32
call void @foo(i32 *%ptr)
%alt = call i32 asm "stepa $0", "=h"()
%cond = icmp ult i32 %limit, 42
%orig = load i32, i32 *%ptr
%res = select i1 %cond, i32 %orig, i32 %alt
store i32 %res, i32 *%ptr
call void @foo(i32 *%ptr)
ret void
}
; Test that conditionally-executed stores do not use STOC, since STOC
; is allowed to trap even when the condition is false.
define void @f8(i32 %a, i32 %b, i32 *%dest) {
; CHECK-LABEL: f8:
; CHECK-NOT: stoc
; CHECK: stfh
; CHECK: br %r14
entry:
%val = call i32 asm "stepa $0", "=h"()
%cmp = icmp ule i32 %a, %b
br i1 %cmp, label %store, label %exit
store:
store i32 %val, i32 *%dest
br label %exit
exit:
ret void
}

View File

@ -4414,3 +4414,196 @@
#CHECK: locghi %r11, 32512, 15
0xec 0xbf 0x7f 0x00 0x00 0x46
#CHECK: lochhi %r11, 42, 0
0xec 0xb0 0x00 0x2a 0x00 0x4e
#CHECK: lochhio %r11, 42
0xec 0xb1 0x00 0x2a 0x00 0x4e
#CHECK: lochhih %r11, 42
0xec 0xb2 0x00 0x2a 0x00 0x4e
#CHECK: lochhinle %r11, 42
0xec 0xb3 0x00 0x2a 0x00 0x4e
#CHECK: lochhil %r11, -1
0xec 0xb4 0xff 0xff 0x00 0x4e
#CHECK: lochhinhe %r11, 42
0xec 0xb5 0x00 0x2a 0x00 0x4e
#CHECK: lochhilh %r11, -1
0xec 0xb6 0xff 0xff 0x00 0x4e
#CHECK: lochhine %r11, 0
0xec 0xb7 0x00 0x00 0x00 0x4e
#CHECK: lochhie %r11, 0
0xec 0xb8 0x00 0x00 0x00 0x4e
#CHECK: lochhinlh %r11, 42
0xec 0xb9 0x00 0x2a 0x00 0x4e
#CHECK: lochhihe %r11, 255
0xec 0xba 0x00 0xff 0x00 0x4e
#CHECK: lochhinl %r11, 255
0xec 0xbb 0x00 0xff 0x00 0x4e
#CHECK: lochhile %r11, 32767
0xec 0xbc 0x7f 0xff 0x00 0x4e
#CHECK: lochhinh %r11, 32767
0xec 0xbd 0x7f 0xff 0x00 0x4e
#CHECK: lochhino %r11, 32512
0xec 0xbe 0x7f 0x00 0x00 0x4e
#CHECK: lochhi %r11, 32512, 15
0xec 0xbf 0x7f 0x00 0x00 0x4e
# CHECK: locfh %r7, 6399(%r8), 0
0xeb 0x70 0x88 0xff 0x01 0xe0
# CHECK: locfho %r7, 6399(%r8)
0xeb 0x71 0x88 0xff 0x01 0xe0
# CHECK: locfhh %r7, 6399(%r8)
0xeb 0x72 0x88 0xff 0x01 0xe0
# CHECK: locfhnle %r7, 6399(%r8)
0xeb 0x73 0x88 0xff 0x01 0xe0
# CHECK: locfhl %r7, 6399(%r8)
0xeb 0x74 0x88 0xff 0x01 0xe0
# CHECK: locfhnhe %r7, 6399(%r8)
0xeb 0x75 0x88 0xff 0x01 0xe0
# CHECK: locfhlh %r7, 6399(%r8)
0xeb 0x76 0x88 0xff 0x01 0xe0
# CHECK: locfhne %r7, 6399(%r8)
0xeb 0x77 0x88 0xff 0x01 0xe0
# CHECK: locfhe %r7, 6399(%r8)
0xeb 0x78 0x88 0xff 0x01 0xe0
# CHECK: locfhnlh %r7, 6399(%r8)
0xeb 0x79 0x88 0xff 0x01 0xe0
# CHECK: locfhhe %r7, 6399(%r8)
0xeb 0x7a 0x88 0xff 0x01 0xe0
# CHECK: locfhnl %r7, 6399(%r8)
0xeb 0x7b 0x88 0xff 0x01 0xe0
# CHECK: locfhle %r7, 6399(%r8)
0xeb 0x7c 0x88 0xff 0x01 0xe0
# CHECK: locfhnh %r7, 6399(%r8)
0xeb 0x7d 0x88 0xff 0x01 0xe0
# CHECK: locfhno %r7, 6399(%r8)
0xeb 0x7e 0x88 0xff 0x01 0xe0
# CHECK: locfh %r7, 6399(%r8), 15
0xeb 0x7f 0x88 0xff 0x01 0xe0
# CHECK: locfhr %r11, %r3, 0
0xb9 0xe0 0x00 0xb3
# CHECK: locfhro %r11, %r3
0xb9 0xe0 0x10 0xb3
# CHECK: locfhrh %r11, %r3
0xb9 0xe0 0x20 0xb3
# CHECK: locfhrnle %r11, %r3
0xb9 0xe0 0x30 0xb3
# CHECK: locfhrl %r11, %r3
0xb9 0xe0 0x40 0xb3
# CHECK: locfhrnhe %r11, %r3
0xb9 0xe0 0x50 0xb3
# CHECK: locfhrlh %r11, %r3
0xb9 0xe0 0x60 0xb3
# CHECK: locfhrne %r11, %r3
0xb9 0xe0 0x70 0xb3
# CHECK: locfhre %r11, %r3
0xb9 0xe0 0x80 0xb3
# CHECK: locfhrnlh %r11, %r3
0xb9 0xe0 0x90 0xb3
# CHECK: locfhrhe %r11, %r3
0xb9 0xe0 0xa0 0xb3
# CHECK: locfhrnl %r11, %r3
0xb9 0xe0 0xb0 0xb3
# CHECK: locfhrle %r11, %r3
0xb9 0xe0 0xc0 0xb3
# CHECK: locfhrnh %r11, %r3
0xb9 0xe0 0xd0 0xb3
# CHECK: locfhrno %r11, %r3
0xb9 0xe0 0xe0 0xb3
# CHECK: locfhr %r11, %r3, 15
0xb9 0xe0 0xf0 0xb3
# CHECK: stocfh %r1, 2(%r3), 0
0xeb 0x10 0x30 0x02 0x00 0xe1
# CHECK: stocfho %r1, 2(%r3)
0xeb 0x11 0x30 0x02 0x00 0xe1
# CHECK: stocfhh %r1, 2(%r3)
0xeb 0x12 0x30 0x02 0x00 0xe1
# CHECK: stocfhnle %r1, 2(%r3)
0xeb 0x13 0x30 0x02 0x00 0xe1
# CHECK: stocfhl %r1, 2(%r3)
0xeb 0x14 0x30 0x02 0x00 0xe1
# CHECK: stocfhnhe %r1, 2(%r3)
0xeb 0x15 0x30 0x02 0x00 0xe1
# CHECK: stocfhlh %r1, 2(%r3)
0xeb 0x16 0x30 0x02 0x00 0xe1
# CHECK: stocfhne %r1, 2(%r3)
0xeb 0x17 0x30 0x02 0x00 0xe1
# CHECK: stocfhe %r1, 2(%r3)
0xeb 0x18 0x30 0x02 0x00 0xe1
# CHECK: stocfhnlh %r1, 2(%r3)
0xeb 0x19 0x30 0x02 0x00 0xe1
# CHECK: stocfhhe %r1, 2(%r3)
0xeb 0x1a 0x30 0x02 0x00 0xe1
# CHECK: stocfhnl %r1, 2(%r3)
0xeb 0x1b 0x30 0x02 0x00 0xe1
# CHECK: stocfhle %r1, 2(%r3)
0xeb 0x1c 0x30 0x02 0x00 0xe1
# CHECK: stocfhnh %r1, 2(%r3)
0xeb 0x1d 0x30 0x02 0x00 0xe1
# CHECK: stocfhno %r1, 2(%r3)
0xeb 0x1e 0x30 0x02 0x00 0xe1
# CHECK: stocfh %r1, 2(%r3), 15
0xeb 0x1f 0x30 0x02 0x00 0xe1

View File

@ -1960,3 +1960,56 @@
locghie %f0, 0
locghie 0, %r0
#CHECK: error: invalid operand
#CHECK: lochhie %r0, 66000
#CHECK: error: invalid operand
#CHECK: lochhie %f0, 0
#CHECK: error: invalid operand
#CHECK: lochhie 0, %r0
lochhie %r0, 66000
lochhie %f0, 0
lochhie 0, %r0
#CHECK: error: invalid operand
#CHECK: locfh %r0,0,-1
#CHECK: error: invalid operand
#CHECK: locfh %r0,0,16
#CHECK: error: invalid operand
#CHECK: locfh %r0,-524289,1
#CHECK: error: invalid operand
#CHECK: locfh %r0,524288,1
#CHECK: error: invalid use of indexed addressing
#CHECK: locfh %r0,0(%r1,%r2),1
locfh %r0,0,-1
locfh %r0,0,16
locfh %r0,-524289,1
locfh %r0,524288,1
locfh %r0,0(%r1,%r2),1
#CHECK: error: invalid operand
#CHECK: locfhr %r0,%r0,-1
#CHECK: error: invalid operand
#CHECK: locfhr %r0,%r0,16
locfhr %r0,%r0,-1
locfhr %r0,%r0,16
#CHECK: error: invalid operand
#CHECK: stocfh %r0,0,-1
#CHECK: error: invalid operand
#CHECK: stocfh %r0,0,16
#CHECK: error: invalid operand
#CHECK: stocfh %r0,-524289,1
#CHECK: error: invalid operand
#CHECK: stocfh %r0,524288,1
#CHECK: error: invalid use of indexed addressing
#CHECK: stocfh %r0,0(%r1,%r2),1
stocfh %r0,0,-1
stocfh %r0,0,16
stocfh %r0,-524289,1
stocfh %r0,524288,1
stocfh %r0,0(%r1,%r2),1

View File

@ -6892,3 +6892,206 @@
locghinh %r11, 32767
locghino %r11, 32512
locghi %r11, 32512, 15
#CHECK: lochhi %r11, 42, 0 # encoding: [0xec,0xb0,0x00,0x2a,0x00,0x4e]
#CHECK: lochhio %r11, 42 # encoding: [0xec,0xb1,0x00,0x2a,0x00,0x4e]
#CHECK: lochhih %r11, 42 # encoding: [0xec,0xb2,0x00,0x2a,0x00,0x4e]
#CHECK: lochhinle %r11, 42 # encoding: [0xec,0xb3,0x00,0x2a,0x00,0x4e]
#CHECK: lochhil %r11, -1 # encoding: [0xec,0xb4,0xff,0xff,0x00,0x4e]
#CHECK: lochhinhe %r11, 42 # encoding: [0xec,0xb5,0x00,0x2a,0x00,0x4e]
#CHECK: lochhilh %r11, -1 # encoding: [0xec,0xb6,0xff,0xff,0x00,0x4e]
#CHECK: lochhine %r11, 0 # encoding: [0xec,0xb7,0x00,0x00,0x00,0x4e]
#CHECK: lochhie %r11, 0 # encoding: [0xec,0xb8,0x00,0x00,0x00,0x4e]
#CHECK: lochhinlh %r11, 42 # encoding: [0xec,0xb9,0x00,0x2a,0x00,0x4e]
#CHECK: lochhihe %r11, 255 # encoding: [0xec,0xba,0x00,0xff,0x00,0x4e]
#CHECK: lochhinl %r11, 255 # encoding: [0xec,0xbb,0x00,0xff,0x00,0x4e]
#CHECK: lochhile %r11, 32767 # encoding: [0xec,0xbc,0x7f,0xff,0x00,0x4e]
#CHECK: lochhinh %r11, 32767 # encoding: [0xec,0xbd,0x7f,0xff,0x00,0x4e]
#CHECK: lochhino %r11, 32512 # encoding: [0xec,0xbe,0x7f,0x00,0x00,0x4e]
#CHECK: lochhi %r11, 32512, 15 # encoding: [0xec,0xbf,0x7f,0x00,0x00,0x4e]
lochhi %r11, 42, 0
lochhio %r11, 42
lochhih %r11, 42
lochhinle %r11, 42
lochhil %r11, -1
lochhinhe %r11, 42
lochhilh %r11, -1
lochhine %r11, 0
lochhie %r11, 0
lochhinlh %r11, 42
lochhihe %r11, 255
lochhinl %r11, 255
lochhile %r11, 32767
lochhinh %r11, 32767
lochhino %r11, 32512
lochhi %r11, 32512, 15
#CHECK: locfh %r0, 0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0xe0]
#CHECK: locfh %r0, 0, 15 # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xe0]
#CHECK: locfh %r0, -524288, 0 # encoding: [0xeb,0x00,0x00,0x00,0x80,0xe0]
#CHECK: locfh %r0, 524287, 0 # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xe0]
#CHECK: locfh %r0, 0(%r1), 0 # encoding: [0xeb,0x00,0x10,0x00,0x00,0xe0]
#CHECK: locfh %r0, 0(%r15), 0 # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xe0]
#CHECK: locfh %r15, 0, 0 # encoding: [0xeb,0xf0,0x00,0x00,0x00,0xe0]
#CHECK: locfh %r1, 4095(%r2), 3 # encoding: [0xeb,0x13,0x2f,0xff,0x00,0xe0]
locfh %r0,0,0
locfh %r0,0,15
locfh %r0,-524288,0
locfh %r0,524287,0
locfh %r0,0(%r1),0
locfh %r0,0(%r15),0
locfh %r15,0,0
locfh %r1,4095(%r2),3
#CHECK: locfho %r1, 2(%r3) # encoding: [0xeb,0x11,0x30,0x02,0x00,0xe0]
#CHECK: locfhh %r1, 2(%r3) # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe0]
#CHECK: locfhp %r1, 2(%r3) # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe0]
#CHECK: locfhnle %r1, 2(%r3) # encoding: [0xeb,0x13,0x30,0x02,0x00,0xe0]
#CHECK: locfhl %r1, 2(%r3) # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe0]
#CHECK: locfhm %r1, 2(%r3) # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe0]
#CHECK: locfhnhe %r1, 2(%r3) # encoding: [0xeb,0x15,0x30,0x02,0x00,0xe0]
#CHECK: locfhlh %r1, 2(%r3) # encoding: [0xeb,0x16,0x30,0x02,0x00,0xe0]
#CHECK: locfhne %r1, 2(%r3) # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe0]
#CHECK: locfhnz %r1, 2(%r3) # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe0]
#CHECK: locfhe %r1, 2(%r3) # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe0]
#CHECK: locfhz %r1, 2(%r3) # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe0]
#CHECK: locfhnlh %r1, 2(%r3) # encoding: [0xeb,0x19,0x30,0x02,0x00,0xe0]
#CHECK: locfhhe %r1, 2(%r3) # encoding: [0xeb,0x1a,0x30,0x02,0x00,0xe0]
#CHECK: locfhnl %r1, 2(%r3) # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe0]
#CHECK: locfhnm %r1, 2(%r3) # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe0]
#CHECK: locfhle %r1, 2(%r3) # encoding: [0xeb,0x1c,0x30,0x02,0x00,0xe0]
#CHECK: locfhnh %r1, 2(%r3) # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe0]
#CHECK: locfhnp %r1, 2(%r3) # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe0]
#CHECK: locfhno %r1, 2(%r3) # encoding: [0xeb,0x1e,0x30,0x02,0x00,0xe0]
locfho %r1,2(%r3)
locfhh %r1,2(%r3)
locfhp %r1,2(%r3)
locfhnle %r1,2(%r3)
locfhl %r1,2(%r3)
locfhm %r1,2(%r3)
locfhnhe %r1,2(%r3)
locfhlh %r1,2(%r3)
locfhne %r1,2(%r3)
locfhnz %r1,2(%r3)
locfhe %r1,2(%r3)
locfhz %r1,2(%r3)
locfhnlh %r1,2(%r3)
locfhhe %r1,2(%r3)
locfhnl %r1,2(%r3)
locfhnm %r1,2(%r3)
locfhle %r1,2(%r3)
locfhnh %r1,2(%r3)
locfhnp %r1,2(%r3)
locfhno %r1,2(%r3)
#CHECK: locfhr %r1, %r2, 0 # encoding: [0xb9,0xe0,0x00,0x12]
#CHECK: locfhr %r1, %r2, 15 # encoding: [0xb9,0xe0,0xf0,0x12]
locfhr %r1,%r2,0
locfhr %r1,%r2,15
#CHECK: locfhro %r1, %r3 # encoding: [0xb9,0xe0,0x10,0x13]
#CHECK: locfhrh %r1, %r3 # encoding: [0xb9,0xe0,0x20,0x13]
#CHECK: locfhrp %r1, %r3 # encoding: [0xb9,0xe0,0x20,0x13]
#CHECK: locfhrnle %r1, %r3 # encoding: [0xb9,0xe0,0x30,0x13]
#CHECK: locfhrl %r1, %r3 # encoding: [0xb9,0xe0,0x40,0x13]
#CHECK: locfhrm %r1, %r3 # encoding: [0xb9,0xe0,0x40,0x13]
#CHECK: locfhrnhe %r1, %r3 # encoding: [0xb9,0xe0,0x50,0x13]
#CHECK: locfhrlh %r1, %r3 # encoding: [0xb9,0xe0,0x60,0x13]
#CHECK: locfhrne %r1, %r3 # encoding: [0xb9,0xe0,0x70,0x13]
#CHECK: locfhrnz %r1, %r3 # encoding: [0xb9,0xe0,0x70,0x13]
#CHECK: locfhre %r1, %r3 # encoding: [0xb9,0xe0,0x80,0x13]
#CHECK: locfhrz %r1, %r3 # encoding: [0xb9,0xe0,0x80,0x13]
#CHECK: locfhrnlh %r1, %r3 # encoding: [0xb9,0xe0,0x90,0x13]
#CHECK: locfhrhe %r1, %r3 # encoding: [0xb9,0xe0,0xa0,0x13]
#CHECK: locfhrnl %r1, %r3 # encoding: [0xb9,0xe0,0xb0,0x13]
#CHECK: locfhrnm %r1, %r3 # encoding: [0xb9,0xe0,0xb0,0x13]
#CHECK: locfhrle %r1, %r3 # encoding: [0xb9,0xe0,0xc0,0x13]
#CHECK: locfhrnh %r1, %r3 # encoding: [0xb9,0xe0,0xd0,0x13]
#CHECK: locfhrnp %r1, %r3 # encoding: [0xb9,0xe0,0xd0,0x13]
#CHECK: locfhrno %r1, %r3 # encoding: [0xb9,0xe0,0xe0,0x13]
locfhro %r1,%r3
locfhrh %r1,%r3
locfhrp %r1,%r3
locfhrnle %r1,%r3
locfhrl %r1,%r3
locfhrm %r1,%r3
locfhrnhe %r1,%r3
locfhrlh %r1,%r3
locfhrne %r1,%r3
locfhrnz %r1,%r3
locfhre %r1,%r3
locfhrz %r1,%r3
locfhrnlh %r1,%r3
locfhrhe %r1,%r3
locfhrnl %r1,%r3
locfhrnm %r1,%r3
locfhrle %r1,%r3
locfhrnh %r1,%r3
locfhrnp %r1,%r3
locfhrno %r1,%r3
#CHECK: stocfh %r0, 0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0xe1]
#CHECK: stocfh %r0, 0, 15 # encoding: [0xeb,0x0f,0x00,0x00,0x00,0xe1]
#CHECK: stocfh %r0, -524288, 0 # encoding: [0xeb,0x00,0x00,0x00,0x80,0xe1]
#CHECK: stocfh %r0, 524287, 0 # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0xe1]
#CHECK: stocfh %r0, 0(%r1), 0 # encoding: [0xeb,0x00,0x10,0x00,0x00,0xe1]
#CHECK: stocfh %r0, 0(%r15), 0 # encoding: [0xeb,0x00,0xf0,0x00,0x00,0xe1]
#CHECK: stocfh %r15, 0, 0 # encoding: [0xeb,0xf0,0x00,0x00,0x00,0xe1]
#CHECK: stocfh %r1, 4095(%r2), 3 # encoding: [0xeb,0x13,0x2f,0xff,0x00,0xe1]
stocfh %r0,0,0
stocfh %r0,0,15
stocfh %r0,-524288,0
stocfh %r0,524287,0
stocfh %r0,0(%r1),0
stocfh %r0,0(%r15),0
stocfh %r15,0,0
stocfh %r1,4095(%r2),3
#CHECK: stocfho %r1, 2(%r3) # encoding: [0xeb,0x11,0x30,0x02,0x00,0xe1]
#CHECK: stocfhh %r1, 2(%r3) # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe1]
#CHECK: stocfhp %r1, 2(%r3) # encoding: [0xeb,0x12,0x30,0x02,0x00,0xe1]
#CHECK: stocfhnle %r1, 2(%r3) # encoding: [0xeb,0x13,0x30,0x02,0x00,0xe1]
#CHECK: stocfhl %r1, 2(%r3) # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe1]
#CHECK: stocfhm %r1, 2(%r3) # encoding: [0xeb,0x14,0x30,0x02,0x00,0xe1]
#CHECK: stocfhnhe %r1, 2(%r3) # encoding: [0xeb,0x15,0x30,0x02,0x00,0xe1]
#CHECK: stocfhlh %r1, 2(%r3) # encoding: [0xeb,0x16,0x30,0x02,0x00,0xe1]
#CHECK: stocfhne %r1, 2(%r3) # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe1]
#CHECK: stocfhnz %r1, 2(%r3) # encoding: [0xeb,0x17,0x30,0x02,0x00,0xe1]
#CHECK: stocfhe %r1, 2(%r3) # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe1]
#CHECK: stocfhz %r1, 2(%r3) # encoding: [0xeb,0x18,0x30,0x02,0x00,0xe1]
#CHECK: stocfhnlh %r1, 2(%r3) # encoding: [0xeb,0x19,0x30,0x02,0x00,0xe1]
#CHECK: stocfhhe %r1, 2(%r3) # encoding: [0xeb,0x1a,0x30,0x02,0x00,0xe1]
#CHECK: stocfhnl %r1, 2(%r3) # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe1]
#CHECK: stocfhnm %r1, 2(%r3) # encoding: [0xeb,0x1b,0x30,0x02,0x00,0xe1]
#CHECK: stocfhle %r1, 2(%r3) # encoding: [0xeb,0x1c,0x30,0x02,0x00,0xe1]
#CHECK: stocfhnh %r1, 2(%r3) # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe1]
#CHECK: stocfhnp %r1, 2(%r3) # encoding: [0xeb,0x1d,0x30,0x02,0x00,0xe1]
#CHECK: stocfhno %r1, 2(%r3) # encoding: [0xeb,0x1e,0x30,0x02,0x00,0xe1]
stocfho %r1,2(%r3)
stocfhh %r1,2(%r3)
stocfhp %r1,2(%r3)
stocfhnle %r1,2(%r3)
stocfhl %r1,2(%r3)
stocfhm %r1,2(%r3)
stocfhnhe %r1,2(%r3)
stocfhlh %r1,2(%r3)
stocfhne %r1,2(%r3)
stocfhnz %r1,2(%r3)
stocfhe %r1,2(%r3)
stocfhz %r1,2(%r3)
stocfhnlh %r1,2(%r3)
stocfhhe %r1,2(%r3)
stocfhnl %r1,2(%r3)
stocfhnm %r1,2(%r3)
stocfhle %r1,2(%r3)
stocfhnh %r1,2(%r3)
stocfhnp %r1,2(%r3)
stocfhno %r1,2(%r3)