hanchenye-llvm-project/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp

455 lines
15 KiB
C++
Raw Normal View History

//===-- SystemZInstrInfo.cpp - SystemZ instruction information ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the SystemZ implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
#include "SystemZInstrInfo.h"
#include "SystemZInstrBuilder.h"
[SystemZ] Add long branch pass Before this change, the SystemZ backend would use BRCL for all branches and only consider shortening them to BRC when generating an object file. E.g. a branch on equal would use the JGE alias of BRCL in assembly output, but might be shortened to the JE alias of BRC in ELF output. This was a useful first step, but it had two problems: (1) The z assembler isn't traditionally supposed to perform branch shortening or branch relaxation. We followed this rule by not relaxing branches in assembler input, but that meant that generating assembly code and then assembling it would not produce the same result as going directly to object code; the former would give long branches everywhere, whereas the latter would use short branches where possible. (2) Other useful branches, like COMPARE AND BRANCH, do not have long forms. We would need to do something else before supporting them. (Although COMPARE AND BRANCH does not change the condition codes, the plan is to model COMPARE AND BRANCH as a CC-clobbering instruction during codegen, so that we can safely lower it to a separate compare and long branch where necessary. This is not a valid transformation for the assembler proper to make.) This patch therefore moves branch relaxation to a pre-emit pass. For now, calls are still shortened from BRASL to BRAS by the assembler, although this too is not really the traditional behaviour. The first test takes about 1.5s to run, and there are likely to be more tests in this vein once further branch types are added. The feeling on IRC was that 1.5s is a bit much for a single test, so I've restricted it to SystemZ hosts for now. The patch exposes (and fixes) some typos in the main CodeGen/SystemZ tests. A later patch will remove the {{g}}s from that directory. llvm-svn: 182274
2013-05-20 22:23:08 +08:00
#include "llvm/Target/TargetMachine.h"
#define GET_INSTRINFO_CTOR
#define GET_INSTRMAP_INFO
#include "SystemZGenInstrInfo.inc"
using namespace llvm;
SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
: SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP),
RI(tm, *this) {
}
// MI is a 128-bit load or store. Split it into two 64-bit loads or stores,
// each having the opcode given by NewOpcode.
void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
unsigned NewOpcode) const {
MachineBasicBlock *MBB = MI->getParent();
MachineFunction &MF = *MBB->getParent();
// Get two load or store instructions. Use the original instruction for one
// of them (arbitarily the second here) and create a clone for the other.
MachineInstr *EarlierMI = MF.CloneMachineInstr(MI);
MBB->insert(MI, EarlierMI);
// Set up the two 64-bit registers.
MachineOperand &HighRegOp = EarlierMI->getOperand(0);
MachineOperand &LowRegOp = MI->getOperand(0);
HighRegOp.setReg(RI.getSubReg(HighRegOp.getReg(), SystemZ::subreg_high));
LowRegOp.setReg(RI.getSubReg(LowRegOp.getReg(), SystemZ::subreg_low));
// The address in the first (high) instruction is already correct.
// Adjust the offset in the second (low) instruction.
MachineOperand &HighOffsetOp = EarlierMI->getOperand(2);
MachineOperand &LowOffsetOp = MI->getOperand(2);
LowOffsetOp.setImm(LowOffsetOp.getImm() + 8);
// Set the opcodes.
unsigned HighOpcode = getOpcodeForOffset(NewOpcode, HighOffsetOp.getImm());
unsigned LowOpcode = getOpcodeForOffset(NewOpcode, LowOffsetOp.getImm());
assert(HighOpcode && LowOpcode && "Both offsets should be in range");
EarlierMI->setDesc(get(HighOpcode));
MI->setDesc(get(LowOpcode));
}
// Split ADJDYNALLOC instruction MI.
void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
MachineBasicBlock *MBB = MI->getParent();
MachineFunction &MF = *MBB->getParent();
MachineFrameInfo *MFFrame = MF.getFrameInfo();
MachineOperand &OffsetMO = MI->getOperand(2);
uint64_t Offset = (MFFrame->getMaxCallFrameSize() +
SystemZMC::CallFrameSize +
OffsetMO.getImm());
unsigned NewOpcode = getOpcodeForOffset(SystemZ::LA, Offset);
assert(NewOpcode && "No support for huge argument lists yet");
MI->setDesc(get(NewOpcode));
OffsetMO.setImm(Offset);
}
// If MI is a simple load or store for a frame object, return the register
// it loads or stores and set FrameIndex to the index of the frame object.
// Return 0 otherwise.
//
// Flag is SimpleBDXLoad for loads and SimpleBDXStore for stores.
static int isSimpleMove(const MachineInstr *MI, int &FrameIndex, int Flag) {
const MCInstrDesc &MCID = MI->getDesc();
if ((MCID.TSFlags & Flag) &&
MI->getOperand(1).isFI() &&
MI->getOperand(2).getImm() == 0 &&
MI->getOperand(3).getReg() == 0) {
FrameIndex = MI->getOperand(1).getIndex();
return MI->getOperand(0).getReg();
}
return 0;
}
unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXLoad);
}
unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
return isSimpleMove(MI, FrameIndex, SystemZII::SimpleBDXStore);
}
bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
// Most of the code and comments here are boilerplate.
// Start from the bottom of the block and work up, examining the
// terminator instructions.
MachineBasicBlock::iterator I = MBB.end();
while (I != MBB.begin()) {
--I;
if (I->isDebugValue())
continue;
// Working from the bottom, when we see a non-terminator instruction, we're
// done.
if (!isUnpredicatedTerminator(I))
break;
// A terminator that isn't a branch can't easily be handled by this
// analysis.
unsigned ThisCond;
const MachineOperand *ThisTarget;
if (!isBranch(I, ThisCond, ThisTarget))
return true;
// Can't handle indirect branches.
if (!ThisTarget->isMBB())
return true;
if (ThisCond == SystemZ::CCMASK_ANY) {
// Handle unconditional branches.
if (!AllowModify) {
TBB = ThisTarget->getMBB();
continue;
}
// If the block has any instructions after a JMP, delete them.
while (llvm::next(I) != MBB.end())
llvm::next(I)->eraseFromParent();
Cond.clear();
FBB = 0;
// Delete the JMP if it's equivalent to a fall-through.
if (MBB.isLayoutSuccessor(ThisTarget->getMBB())) {
TBB = 0;
I->eraseFromParent();
I = MBB.end();
continue;
}
// TBB is used to indicate the unconditinal destination.
TBB = ThisTarget->getMBB();
continue;
}
// Working from the bottom, handle the first conditional branch.
if (Cond.empty()) {
// FIXME: add X86-style branch swap
FBB = TBB;
TBB = ThisTarget->getMBB();
Cond.push_back(MachineOperand::CreateImm(ThisCond));
continue;
}
// Handle subsequent conditional branches.
assert(Cond.size() == 1);
assert(TBB);
// Only handle the case where all conditional branches branch to the same
// destination.
if (TBB != ThisTarget->getMBB())
return true;
// If the conditions are the same, we can leave them alone.
unsigned OldCond = Cond[0].getImm();
if (OldCond == ThisCond)
continue;
// FIXME: Try combining conditions like X86 does. Should be easy on Z!
}
return false;
}
unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
// Most of the code and comments here are boilerplate.
MachineBasicBlock::iterator I = MBB.end();
unsigned Count = 0;
while (I != MBB.begin()) {
--I;
if (I->isDebugValue())
continue;
unsigned Cond;
const MachineOperand *Target;
if (!isBranch(I, Cond, Target))
break;
if (!Target->isMBB())
break;
// Remove the branch.
I->eraseFromParent();
I = MBB.end();
++Count;
}
return Count;
}
unsigned
SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const {
// In this function we output 32-bit branches, which should always
// have enough range. They can be shortened and relaxed by later code
// in the pipeline, if desired.
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
"SystemZ branch conditions have one component!");
if (Cond.empty()) {
// Unconditional branch?
assert(!FBB && "Unconditional branch with multiple successors!");
[SystemZ] Add long branch pass Before this change, the SystemZ backend would use BRCL for all branches and only consider shortening them to BRC when generating an object file. E.g. a branch on equal would use the JGE alias of BRCL in assembly output, but might be shortened to the JE alias of BRC in ELF output. This was a useful first step, but it had two problems: (1) The z assembler isn't traditionally supposed to perform branch shortening or branch relaxation. We followed this rule by not relaxing branches in assembler input, but that meant that generating assembly code and then assembling it would not produce the same result as going directly to object code; the former would give long branches everywhere, whereas the latter would use short branches where possible. (2) Other useful branches, like COMPARE AND BRANCH, do not have long forms. We would need to do something else before supporting them. (Although COMPARE AND BRANCH does not change the condition codes, the plan is to model COMPARE AND BRANCH as a CC-clobbering instruction during codegen, so that we can safely lower it to a separate compare and long branch where necessary. This is not a valid transformation for the assembler proper to make.) This patch therefore moves branch relaxation to a pre-emit pass. For now, calls are still shortened from BRASL to BRAS by the assembler, although this too is not really the traditional behaviour. The first test takes about 1.5s to run, and there are likely to be more tests in this vein once further branch types are added. The feeling on IRC was that 1.5s is a bit much for a single test, so I've restricted it to SystemZ hosts for now. The patch exposes (and fixes) some typos in the main CodeGen/SystemZ tests. A later patch will remove the {{g}}s from that directory. llvm-svn: 182274
2013-05-20 22:23:08 +08:00
BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(TBB);
return 1;
}
// Conditional branch.
unsigned Count = 0;
unsigned CC = Cond[0].getImm();
[SystemZ] Add long branch pass Before this change, the SystemZ backend would use BRCL for all branches and only consider shortening them to BRC when generating an object file. E.g. a branch on equal would use the JGE alias of BRCL in assembly output, but might be shortened to the JE alias of BRC in ELF output. This was a useful first step, but it had two problems: (1) The z assembler isn't traditionally supposed to perform branch shortening or branch relaxation. We followed this rule by not relaxing branches in assembler input, but that meant that generating assembly code and then assembling it would not produce the same result as going directly to object code; the former would give long branches everywhere, whereas the latter would use short branches where possible. (2) Other useful branches, like COMPARE AND BRANCH, do not have long forms. We would need to do something else before supporting them. (Although COMPARE AND BRANCH does not change the condition codes, the plan is to model COMPARE AND BRANCH as a CC-clobbering instruction during codegen, so that we can safely lower it to a separate compare and long branch where necessary. This is not a valid transformation for the assembler proper to make.) This patch therefore moves branch relaxation to a pre-emit pass. For now, calls are still shortened from BRASL to BRAS by the assembler, although this too is not really the traditional behaviour. The first test takes about 1.5s to run, and there are likely to be more tests in this vein once further branch types are added. The feeling on IRC was that 1.5s is a bit much for a single test, so I've restricted it to SystemZ hosts for now. The patch exposes (and fixes) some typos in the main CodeGen/SystemZ tests. A later patch will remove the {{g}}s from that directory. llvm-svn: 182274
2013-05-20 22:23:08 +08:00
BuildMI(&MBB, DL, get(SystemZ::BRC)).addImm(CC).addMBB(TBB);
++Count;
if (FBB) {
// Two-way Conditional branch. Insert the second branch.
[SystemZ] Add long branch pass Before this change, the SystemZ backend would use BRCL for all branches and only consider shortening them to BRC when generating an object file. E.g. a branch on equal would use the JGE alias of BRCL in assembly output, but might be shortened to the JE alias of BRC in ELF output. This was a useful first step, but it had two problems: (1) The z assembler isn't traditionally supposed to perform branch shortening or branch relaxation. We followed this rule by not relaxing branches in assembler input, but that meant that generating assembly code and then assembling it would not produce the same result as going directly to object code; the former would give long branches everywhere, whereas the latter would use short branches where possible. (2) Other useful branches, like COMPARE AND BRANCH, do not have long forms. We would need to do something else before supporting them. (Although COMPARE AND BRANCH does not change the condition codes, the plan is to model COMPARE AND BRANCH as a CC-clobbering instruction during codegen, so that we can safely lower it to a separate compare and long branch where necessary. This is not a valid transformation for the assembler proper to make.) This patch therefore moves branch relaxation to a pre-emit pass. For now, calls are still shortened from BRASL to BRAS by the assembler, although this too is not really the traditional behaviour. The first test takes about 1.5s to run, and there are likely to be more tests in this vein once further branch types are added. The feeling on IRC was that 1.5s is a bit much for a single test, so I've restricted it to SystemZ hosts for now. The patch exposes (and fixes) some typos in the main CodeGen/SystemZ tests. A later patch will remove the {{g}}s from that directory. llvm-svn: 182274
2013-05-20 22:23:08 +08:00
BuildMI(&MBB, DL, get(SystemZ::J)).addMBB(FBB);
++Count;
}
return Count;
}
void
SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const {
// Split 128-bit GPR moves into two 64-bit moves. This handles ADDR128 too.
if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) {
copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_high),
RI.getSubReg(SrcReg, SystemZ::subreg_high), KillSrc);
copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_low),
RI.getSubReg(SrcReg, SystemZ::subreg_low), KillSrc);
return;
}
// Everything else needs only one instruction.
unsigned Opcode;
if (SystemZ::GR32BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LR;
else if (SystemZ::GR64BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LGR;
else if (SystemZ::FP32BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LER;
else if (SystemZ::FP64BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LDR;
else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))
Opcode = SystemZ::LXR;
else
llvm_unreachable("Impossible reg-to-reg copy");
BuildMI(MBB, MBBI, DL, get(Opcode), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
}
void
SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill,
int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
// Callers may expect a single instruction, so keep 128-bit moves
// together for now and lower them after register allocation.
unsigned LoadOpcode, StoreOpcode;
getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
addFrameReference(BuildMI(MBB, MBBI, DL, get(StoreOpcode))
.addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
}
void
SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
// Callers may expect a single instruction, so keep 128-bit moves
// together for now and lower them after register allocation.
unsigned LoadOpcode, StoreOpcode;
getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
addFrameReference(BuildMI(MBB, MBBI, DL, get(LoadOpcode), DestReg),
FrameIdx);
}
bool
SystemZInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
switch (MI->getOpcode()) {
case SystemZ::L128:
splitMove(MI, SystemZ::LG);
return true;
case SystemZ::ST128:
splitMove(MI, SystemZ::STG);
return true;
case SystemZ::LX:
splitMove(MI, SystemZ::LD);
return true;
case SystemZ::STX:
splitMove(MI, SystemZ::STD);
return true;
case SystemZ::ADJDYNALLOC:
splitAdjDynAlloc(MI);
return true;
default:
return false;
}
}
bool SystemZInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 1 && "Invalid branch condition!");
Cond[0].setImm(Cond[0].getImm() ^ SystemZ::CCMASK_ANY);
return false;
}
[SystemZ] Add long branch pass Before this change, the SystemZ backend would use BRCL for all branches and only consider shortening them to BRC when generating an object file. E.g. a branch on equal would use the JGE alias of BRCL in assembly output, but might be shortened to the JE alias of BRC in ELF output. This was a useful first step, but it had two problems: (1) The z assembler isn't traditionally supposed to perform branch shortening or branch relaxation. We followed this rule by not relaxing branches in assembler input, but that meant that generating assembly code and then assembling it would not produce the same result as going directly to object code; the former would give long branches everywhere, whereas the latter would use short branches where possible. (2) Other useful branches, like COMPARE AND BRANCH, do not have long forms. We would need to do something else before supporting them. (Although COMPARE AND BRANCH does not change the condition codes, the plan is to model COMPARE AND BRANCH as a CC-clobbering instruction during codegen, so that we can safely lower it to a separate compare and long branch where necessary. This is not a valid transformation for the assembler proper to make.) This patch therefore moves branch relaxation to a pre-emit pass. For now, calls are still shortened from BRASL to BRAS by the assembler, although this too is not really the traditional behaviour. The first test takes about 1.5s to run, and there are likely to be more tests in this vein once further branch types are added. The feeling on IRC was that 1.5s is a bit much for a single test, so I've restricted it to SystemZ hosts for now. The patch exposes (and fixes) some typos in the main CodeGen/SystemZ tests. A later patch will remove the {{g}}s from that directory. llvm-svn: 182274
2013-05-20 22:23:08 +08:00
uint64_t SystemZInstrInfo::getInstSizeInBytes(const MachineInstr *MI) const {
if (MI->getOpcode() == TargetOpcode::INLINEASM) {
const MachineFunction *MF = MI->getParent()->getParent();
const char *AsmStr = MI->getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
return MI->getDesc().getSize();
}
bool SystemZInstrInfo::isBranch(const MachineInstr *MI, unsigned &Cond,
const MachineOperand *&Target) const {
switch (MI->getOpcode()) {
case SystemZ::BR:
case SystemZ::J:
case SystemZ::JG:
Cond = SystemZ::CCMASK_ANY;
Target = &MI->getOperand(0);
return true;
case SystemZ::BRC:
case SystemZ::BRCL:
Cond = MI->getOperand(0).getImm();
Target = &MI->getOperand(1);
return true;
default:
assert(!MI->getDesc().isBranch() && "Unknown branch opcode");
return false;
}
}
void SystemZInstrInfo::getLoadStoreOpcodes(const TargetRegisterClass *RC,
unsigned &LoadOpcode,
unsigned &StoreOpcode) const {
if (RC == &SystemZ::GR32BitRegClass || RC == &SystemZ::ADDR32BitRegClass) {
LoadOpcode = SystemZ::L;
StoreOpcode = SystemZ::ST32;
} else if (RC == &SystemZ::GR64BitRegClass ||
RC == &SystemZ::ADDR64BitRegClass) {
LoadOpcode = SystemZ::LG;
StoreOpcode = SystemZ::STG;
} else if (RC == &SystemZ::GR128BitRegClass ||
RC == &SystemZ::ADDR128BitRegClass) {
LoadOpcode = SystemZ::L128;
StoreOpcode = SystemZ::ST128;
} else if (RC == &SystemZ::FP32BitRegClass) {
LoadOpcode = SystemZ::LE;
StoreOpcode = SystemZ::STE;
} else if (RC == &SystemZ::FP64BitRegClass) {
LoadOpcode = SystemZ::LD;
StoreOpcode = SystemZ::STD;
} else if (RC == &SystemZ::FP128BitRegClass) {
LoadOpcode = SystemZ::LX;
StoreOpcode = SystemZ::STX;
} else
llvm_unreachable("Unsupported regclass to load or store");
}
unsigned SystemZInstrInfo::getOpcodeForOffset(unsigned Opcode,
int64_t Offset) const {
const MCInstrDesc &MCID = get(Opcode);
int64_t Offset2 = (MCID.TSFlags & SystemZII::Is128Bit ? Offset + 8 : Offset);
if (isUInt<12>(Offset) && isUInt<12>(Offset2)) {
// Get the instruction to use for unsigned 12-bit displacements.
int Disp12Opcode = SystemZ::getDisp12Opcode(Opcode);
if (Disp12Opcode >= 0)
return Disp12Opcode;
// All address-related instructions can use unsigned 12-bit
// displacements.
return Opcode;
}
if (isInt<20>(Offset) && isInt<20>(Offset2)) {
// Get the instruction to use for signed 20-bit displacements.
int Disp20Opcode = SystemZ::getDisp20Opcode(Opcode);
if (Disp20Opcode >= 0)
return Disp20Opcode;
// Check whether Opcode allows signed 20-bit displacements.
if (MCID.TSFlags & SystemZII::Has20BitOffset)
return Opcode;
}
return 0;
}
void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned Reg, uint64_t Value) const {
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
unsigned Opcode;
if (isInt<16>(Value))
Opcode = SystemZ::LGHI;
else if (SystemZ::isImmLL(Value))
Opcode = SystemZ::LLILL;
else if (SystemZ::isImmLH(Value)) {
Opcode = SystemZ::LLILH;
Value >>= 16;
} else {
assert(isInt<32>(Value) && "Huge values not handled yet");
Opcode = SystemZ::LGFI;
}
BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value);
}