[PowerPC] Ensure displacements for DQ-Form instructions are multiples of 16

As outlined in the PR, we didn't ensure that displacements for DQ-Form
instructions are multiples of 16. Since the instruction encoding encodes
a quad-word displacement, a sub-16 byte displacement is meaningless and
ends up being encoded incorrectly.

Fixes https://bugs.llvm.org/show_bug.cgi?id=33671.

Differential Revision: https://reviews.llvm.org/D35007

llvm-svn: 307934
This commit is contained in:
Nemanja Ivanovic 2017-07-13 18:17:10 +00:00
parent 502de22fda
commit 3c7e276d24
12 changed files with 209 additions and 116 deletions

View File

@ -271,7 +271,8 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 12;
const MCOperand &MO = MI.getOperand(OpNo);
assert(MO.isImm());
assert(MO.isImm() && !(MO.getImm() % 16) &&
"Expecting an immediate that is a multiple of 16");
return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits;
}

View File

@ -178,7 +178,7 @@ namespace {
/// a base register plus a signed 16-bit displacement [r+imm].
bool SelectAddrImm(SDValue N, SDValue &Disp,
SDValue &Base) {
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
}
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
@ -211,7 +211,11 @@ namespace {
/// a base register plus a signed 16-bit displacement that is a multiple of 4.
/// Suitable for use by STD and friends.
bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
}
bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
}
// Select an address into a single register.
@ -305,6 +309,7 @@ private:
bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);
bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
void transferMemOperands(SDNode *N, SDNode *Result);
};
@ -2999,6 +3004,25 @@ SDValue PPCDAGToDAGISel::getSETCCInGPR(SDValue Compare,
return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
}
/// Does this node represent a load/store node whose address can be represented
/// with a register plus an immediate that's a multiple of \p Val:
bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
SDValue AddrOp;
if (LDN)
AddrOp = LDN->getOperand(1);
else if (STN)
AddrOp = STN->getOperand(2);
short Imm = 0;
if (AddrOp.getOpcode() == ISD::ADD)
return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
// If the address comes from the outside, the offset will be zero.
return AddrOp.getOpcode() == ISD::CopyFromReg;
}
void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);

View File

@ -2130,12 +2130,12 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
/// Returns true if the address N can be represented by a base register plus
/// a signed 16-bit displacement [r+imm], and if it is not better
/// represented as reg+reg. If Aligned is true, only accept displacements
/// suitable for STD and friends, i.e. multiples of 4.
/// represented as reg+reg. If \p Alignment is non-zero, only accept
/// displacements that are multiples of that value.
bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
SDValue &Base,
SelectionDAG &DAG,
bool Aligned) const {
unsigned Alignment) const {
// FIXME dl should come from parent load or store, not from address
SDLoc dl(N);
// If this can be more profitably realized as r+r, fail.
@ -2145,7 +2145,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
if (N.getOpcode() == ISD::ADD) {
int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
(!Aligned || (imm & 3) == 0)) {
(!Alignment || (imm % Alignment) == 0)) {
Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
@ -2169,7 +2169,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
} else if (N.getOpcode() == ISD::OR) {
int16_t imm = 0;
if (isIntS16Immediate(N.getOperand(1), imm) &&
(!Aligned || (imm & 3) == 0)) {
(!Alignment || (imm % Alignment) == 0)) {
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
@ -2196,7 +2196,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// If this address fits entirely in a 16-bit sext immediate field, codegen
// this as "d, 0"
int16_t Imm;
if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
if (isIntS16Immediate(CN, Imm) && (!Alignment || (Imm % Alignment) == 0)) {
Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
@ -2206,7 +2206,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// Handle 32-bit sext immediates with LIS + addr mode.
if ((CN->getValueType(0) == MVT::i32 ||
(int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
(!Aligned || (CN->getZExtValue() & 3) == 0)) {
(!Alignment || (CN->getZExtValue() % Alignment) == 0)) {
int Addr = (int)CN->getZExtValue();
// Otherwise, break this down into an LIS + disp.
@ -2321,14 +2321,14 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
// LDU/STU can only handle immediates that are a multiple of 4.
if (VT != MVT::i64) {
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 0))
return false;
} else {
// LDU/STU need an address with at least 4-byte alignment.
if (Alignment < 4)
return false;
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, 4))
return false;
}

View File

@ -616,7 +616,7 @@ namespace llvm {
/// is not better represented as reg+reg. If Aligned is true, only accept
/// displacements suitable for STD and friends, i.e. multiples of 4.
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
SelectionDAG &DAG, bool Aligned) const;
SelectionDAG &DAG, unsigned Alignment) const;
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.

View File

@ -405,6 +405,25 @@ def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() < 4;
}]>;
// This is a somewhat weaker condition than actually checking for 16-byte
// alignment. It is simply checking that the displacement can be represented
// as an immediate that is a multiple of 16 (i.e. the requirements for DQ-Form
// instructions).
def quadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return isOffsetMultipleOf(N, 16);
}]>;
def quadwOffsetStore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return isOffsetMultipleOf(N, 16);
}]>;
def nonQuadwOffsetLoad : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return !isOffsetMultipleOf(N, 16);
}]>;
def nonQuadwOffsetStore : PatFrag<(ops node:$val, node:$ptr),
(store node:$val, node:$ptr), [{
return !isOffsetMultipleOf(N, 16);
}]>;
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
@ -815,7 +834,8 @@ def pred : Operand<OtherVT> {
def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
def iqaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX16", [], []>; // "stxv"
// The address in a single register. This is used with the SjLj
// pseudo-instructions.

View File

@ -2606,37 +2606,41 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
} // IsLittleEndian, HasP9Vector
// D-Form Load/Store
def : Pat<(v4i32 (load iaddr:$src)), (LXV memrix16:$src)>;
def : Pat<(v4f32 (load iaddr:$src)), (LXV memrix16:$src)>;
def : Pat<(v2i64 (load iaddr:$src)), (LXV memrix16:$src)>;
def : Pat<(v2f64 (load iaddr:$src)), (LXV memrix16:$src)>;
def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddr:$src)), (LXV memrix16:$src)>;
def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddr:$src)), (LXV memrix16:$src)>;
def : Pat<(v4i32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
def : Pat<(v4f32 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
def : Pat<(v2i64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
def : Pat<(v2f64 (quadwOffsetLoad iqaddr:$src)), (LXV memrix16:$src)>;
def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iqaddr:$src)), (LXV memrix16:$src)>;
def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iqaddr:$src)), (LXV memrix16:$src)>;
def : Pat<(store v4f32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
def : Pat<(store v4i32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
def : Pat<(store v2f64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
def : Pat<(store v2i64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>;
def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddr:$dst),
def : Pat<(quadwOffsetStore v4f32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
def : Pat<(quadwOffsetStore v4i32:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
def : Pat<(quadwOffsetStore v2f64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
def : Pat<(quadwOffsetStore v2i64:$rS, iqaddr:$dst), (STXV $rS, memrix16:$dst)>;
def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iqaddr:$dst),
(STXV $rS, memrix16:$dst)>;
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddr:$dst),
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iqaddr:$dst),
(STXV $rS, memrix16:$dst)>;
def : Pat<(v2f64 (load xaddr:$src)), (LXVX xaddr:$src)>;
def : Pat<(v2i64 (load xaddr:$src)), (LXVX xaddr:$src)>;
def : Pat<(v4f32 (load xaddr:$src)), (LXVX xaddr:$src)>;
def : Pat<(v4i32 (load xaddr:$src)), (LXVX xaddr:$src)>;
def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xaddr:$src)), (LXVX xaddr:$src)>;
def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xaddr:$src)), (LXVX xaddr:$src)>;
def : Pat<(store v2f64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
def : Pat<(store v2i64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
def : Pat<(store v4f32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
def : Pat<(store v4i32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xaddr:$dst),
(STXVX $rS, xaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xaddr:$dst),
(STXVX $rS, xaddr:$dst)>;
def : Pat<(v2f64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
def : Pat<(v2i64 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
def : Pat<(v4f32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
def : Pat<(v4i32 (nonQuadwOffsetLoad xoaddr:$src)), (LXVX xoaddr:$src)>;
def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>;
def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>;
def : Pat<(nonQuadwOffsetStore v2f64:$rS, xoaddr:$dst),
(STXVX $rS, xoaddr:$dst)>;
def : Pat<(nonQuadwOffsetStore v2i64:$rS, xoaddr:$dst),
(STXVX $rS, xoaddr:$dst)>;
def : Pat<(nonQuadwOffsetStore v4f32:$rS, xoaddr:$dst),
(STXVX $rS, xoaddr:$dst)>;
def : Pat<(nonQuadwOffsetStore v4i32:$rS, xoaddr:$dst),
(STXVX $rS, xoaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
(STXVX $rS, xoaddr:$dst)>;
def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
(STXVX $rS, xoaddr:$dst)>;
def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
(v4i32 (LXVWSX xoaddr:$src))>;
def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
@ -2788,21 +2792,21 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
let isPseudo = 1 in {
def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src),
"#DFLOADf32",
[(set f32:$XT, (load iaddr:$src))]>;
[(set f32:$XT, (load ixaddr:$src))]>;
def DFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrix:$src),
"#DFLOADf64",
[(set f64:$XT, (load iaddr:$src))]>;
[(set f64:$XT, (load ixaddr:$src))]>;
def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst),
"#DFSTOREf32",
[(store f32:$XT, iaddr:$dst)]>;
[(store f32:$XT, ixaddr:$dst)]>;
def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
"#DFSTOREf64",
[(store f64:$XT, iaddr:$dst)]>;
[(store f64:$XT, ixaddr:$dst)]>;
}
def : Pat<(f64 (extloadf32 iaddr:$src)),
(COPY_TO_REGCLASS (DFLOADf32 iaddr:$src), VSFRC)>;
def : Pat<(f32 (fpround (extloadf32 iaddr:$src))),
(f32 (DFLOADf32 iaddr:$src))>;
def : Pat<(f64 (extloadf32 ixaddr:$src)),
(COPY_TO_REGCLASS (DFLOADf32 ixaddr:$src), VSFRC)>;
def : Pat<(f32 (fpround (extloadf32 ixaddr:$src))),
(f32 (DFLOADf32 ixaddr:$src))>;
} // end HasP9Vector, AddedComplexity
// Integer extend helper dags 32 -> 64
@ -2881,13 +2885,13 @@ def FltToLongLoad {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A)))));
}
def FltToLongLoadP9 {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddr:$A)))));
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 ixaddr:$A)))));
}
def FltToULongLoad {
dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A)))));
}
def FltToULongLoadP9 {
dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddr:$A)))));
dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 ixaddr:$A)))));
}
def FltToLong {
dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A))));
@ -2911,13 +2915,13 @@ def DblToIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A)))));
}
def DblToIntLoadP9 {
dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddr:$A)))));
dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load ixaddr:$A)))));
}
def DblToUIntLoad {
dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A)))));
}
def DblToUIntLoadP9 {
dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddr:$A)))));
dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load ixaddr:$A)))));
}
def DblToLongLoad {
dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A)))));
@ -3088,17 +3092,17 @@ let AddedComplexity = 400 in {
(v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
(XSCVDPSXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>;
def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)),
(v4i32 (XXSPLTW (COPY_TO_REGCLASS
(XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
(XSCVDPUXWS (DFLOADf64 ixaddr:$A)), VSRC), 1))>;
def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
(DFLOADf32 iaddr:$A),
(DFLOADf32 ixaddr:$A),
VSFRC)), 0))>;
def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)),
(v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
(DFLOADf32 iaddr:$A),
(DFLOADf32 ixaddr:$A),
VSFRC)), 0))>;
}

View File

@ -754,19 +754,31 @@ bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
return false;
}
// Figure out if the offset in the instruction must be a multiple of 4.
// This is true for instructions like "STD".
static bool usesIXAddr(const MachineInstr &MI) {
// If the offset must be a multiple of some value, return what that value is.
static unsigned offsetMinAlign(const MachineInstr &MI) {
unsigned OpC = MI.getOpcode();
switch (OpC) {
default:
return false;
return 1;
case PPC::LWA:
case PPC::LWA_32:
case PPC::LD:
case PPC::LDU:
case PPC::STD:
return true;
case PPC::STDU:
case PPC::DFLOADf32:
case PPC::DFLOADf64:
case PPC::DFSTOREf32:
case PPC::DFSTOREf64:
case PPC::LXSD:
case PPC::LXSSP:
case PPC::STXSD:
case PPC::STXSSP:
return 4;
case PPC::LXV:
case PPC::STXV:
return 16;
}
}
@ -852,9 +864,6 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum).ChangeToRegister(
FrameIndex < 0 ? getBaseRegister(MF) : getFrameRegister(MF), false);
// Figure out if the offset in the instruction is shifted right two bits.
bool isIXAddr = usesIXAddr(MI);
// If the instruction is not present in ImmToIdxMap, then it has no immediate
// form (and must be r+r).
bool noImmForm = !MI.isInlineAsm() && OpC != TargetOpcode::STACKMAP &&
@ -883,7 +892,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// happen in invalid code.
assert(OpC != PPC::DBG_VALUE &&
"This should be handled in a target-independent way");
if (!noImmForm && ((isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) ||
if (!noImmForm && ((isInt<16>(Offset) &&
((Offset % offsetMinAlign(MI)) == 0)) ||
OpC == TargetOpcode::STACKMAP ||
OpC == TargetOpcode::PATCHPOINT)) {
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
@ -1076,5 +1086,5 @@ bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
MI->getOpcode() == TargetOpcode::STACKMAP ||
MI->getOpcode() == TargetOpcode::PATCHPOINT ||
(isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
(isInt<16>(Offset) && (Offset % offsetMinAlign(*MI)) == 0);
}

View File

@ -0,0 +1,32 @@
; Function Attrs: norecurse nounwind
; RUN: llc -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 < %s | FileCheck %s
define void @test1(i32* nocapture readonly %arr, i32* nocapture %arrTo) {
entry:
%arrayidx = getelementptr inbounds i32, i32* %arrTo, i64 4
%0 = bitcast i32* %arrayidx to <4 x i32>*
%arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 4
%1 = bitcast i32* %arrayidx1 to <4 x i32>*
%2 = load <4 x i32>, <4 x i32>* %1, align 16
store <4 x i32> %2, <4 x i32>* %0, align 16
ret void
; CHECK-LABEL: test1
; CHECK: lxv [[LD:[0-9]+]], 16(3)
; CHECK: stxv [[LD]], 16(4)
}
; Function Attrs: norecurse nounwind
define void @test2(i32* nocapture readonly %arr, i32* nocapture %arrTo) {
entry:
%arrayidx = getelementptr inbounds i32, i32* %arrTo, i64 1
%0 = bitcast i32* %arrayidx to <4 x i32>*
%arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2
%1 = bitcast i32* %arrayidx1 to <4 x i32>*
%2 = load <4 x i32>, <4 x i32>* %1, align 16
store <4 x i32> %2, <4 x i32>* %0, align 16
ret void
; CHECK-LABEL: test2
; CHECK: addi 3, 3, 8
; CHECK: lxvx [[LD:[0-9]+]], 0, 3
; CHECK: addi 3, 4, 4
; CHECK: stxvx [[LD]], 0, 3
}

View File

@ -1018,13 +1018,13 @@ entry:
; P8BE-LABEL: fromDiffMemVarDi
; P8LE-LABEL: fromDiffMemVarDi
; P9BE: sldi {{r[0-9]+}}, r4, 2
; P9BE-DAG: lxv {{v[0-9]+}}
; P9BE-DAG: lxv
; P9BE-DAG: lxvx {{v[0-9]+}}
; P9BE-DAG: lxvx
; P9BE: vperm
; P9BE: blr
; P9LE: sldi {{r[0-9]+}}, r4, 2
; P9LE-DAG: lxv {{v[0-9]+}}
; P9LE-DAG: lxv
; P9LE-DAG: lxvx {{v[0-9]+}}
; P9LE-DAG: lxvx
; P9LE: vperm
; P9LE: blr
; P8BE: sldi {{r[0-9]+}}, r4, 2
@ -1584,16 +1584,16 @@ entry:
; P9LE-LABEL: fromDiffMemConsAConvdtoi
; P8BE-LABEL: fromDiffMemConsAConvdtoi
; P8LE-LABEL: fromDiffMemConsAConvdtoi
; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3)
; P9BE: lxv [[REG2:[vs0-9]+]], 16(r3)
; P9BE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
; P9BE: vmrgew v2, [[REG6]], [[REG5]]
; P9BE: xvcvspsxws v2, v2
; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3)
; P9LE: lxv [[REG2:[vs0-9]+]], 16(r3)
; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]]
; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]]
; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
@ -2177,12 +2177,14 @@ entry:
; P8BE-LABEL: fromDiffMemVarDui
; P8LE-LABEL: fromDiffMemVarDui
; P9BE-DAG: sldi {{r[0-9]+}}, r4, 2
; P9BE-DAG: lxv {{v[0-9]+}}, -12(r3)
; P9BE-DAG: lxv
; P9BE-DAG: addi r3, r3, -12
; P9BE-DAG: lxvx {{v[0-9]+}}, 0, r3
; P9BE-DAG: lxvx
; P9BE: vperm
; P9BE: blr
; P9LE-DAG: sldi {{r[0-9]+}}, r4, 2
; P9LE-DAG: lxv {{v[0-9]+}}, -12(r3)
; P9LE-DAG: addi r3, r3, -12
; P9LE-DAG: lxvx {{v[0-9]+}}, 0, r3
; P9LE-DAG: lxv
; P9LE: vperm
; P9LE: blr
@ -2742,16 +2744,16 @@ entry:
; P9LE-LABEL: fromDiffMemConsAConvdtoui
; P8BE-LABEL: fromDiffMemConsAConvdtoui
; P8LE-LABEL: fromDiffMemConsAConvdtoui
; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3)
; P9BE: lxv [[REG2:[vs0-9]+]], 16(r3)
; P9BE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
; P9BE: vmrgew v2, [[REG6]], [[REG5]]
; P9BE: xvcvspuxws v2, v2
; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3)
; P9LE: lxv [[REG2:[vs0-9]+]], 16(r3)
; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]]
; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]]
; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
@ -3466,9 +3468,9 @@ entry:
; P9LE-LABEL: fromDiffConstsConvftoll
; P8BE-LABEL: fromDiffConstsConvftoll
; P8LE-LABEL: fromDiffConstsConvftoll
; P9BE: lxv v2
; P9BE: lxvx v2
; P9BE: blr
; P9LE: lxv v2
; P9LE: lxvx v2
; P9LE: blr
; P8BE: lxvd2x v2
; P8BE: blr
@ -4370,9 +4372,9 @@ entry:
; P9LE-LABEL: fromDiffConstsConvftoull
; P8BE-LABEL: fromDiffConstsConvftoull
; P8LE-LABEL: fromDiffConstsConvftoull
; P9BE: lxv v2
; P9BE: lxvx v2
; P9BE: blr
; P9LE: lxv v2
; P9LE: lxvx v2
; P9LE: blr
; P8BE: lxvd2x v2
; P8BE: blr

View File

@ -63,7 +63,7 @@ define <1 x i128> @v1i128_increment_by_one(<1 x i128> %a) nounwind {
; FIXME: li [[R1:r[0-9]+]], 1
; FIXME: li [[R2:r[0-9]+]], 0
; FIXME: mtvsrdd [[V1:v[0-9]+]], [[R2]], [[R1]]
; CHECK-P9: lxv [[V1:v[0-9]+]]
; CHECK-P9: lxvx [[V1:v[0-9]+]]
; CHECK-P9: vadduqm v2, v2, [[V1]]
; CHECK-P9: blr
@ -237,8 +237,8 @@ define <1 x i128> @call_v1i128_increment_by_val() nounwind {
; CHECK-LE: blr
; CHECK-P9-LABEL: @call_v1i128_increment_by_val
; CHECK-P9-DAG: lxv v2
; CHECK-P9-DAG: lxv v3
; CHECK-P9-DAG: lxvx v2
; CHECK-P9-DAG: lxvx v3
; CHECK-P9: bl v1i128_increment_by_val
; CHECK-P9: blr

View File

@ -33,11 +33,11 @@ entry:
; CHECK: stxvd2x [[REG5]]
; CHECK-P9-LABEL: @bar0
; CHECK-P9-DAG: lxv [[REG1:[0-9]+]]
; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3)
; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG1]], [[REG4]], 1
; CHECK-P9: stxv [[REG5]]
; CHECK-P9: stxvx [[REG5]]
define void @bar1() {
entry:
@ -56,9 +56,9 @@ entry:
; CHECK: stxvd2x [[REG5]]
; CHECK-P9-LABEL: @bar1
; CHECK-P9-DAG: lxv [[REG1:[0-9]+]]
; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]]
; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3)
; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
; CHECK-P9: xxmrgld [[REG5:[0-9]+]], [[REG4]], [[REG1]]
; CHECK-P9: stxv [[REG5]]
; CHECK-P9: stxvx [[REG5]]

View File

@ -36,8 +36,8 @@ entry:
%1 = load <16 x i8>, <16 x i8>* @ucb, align 16
%add.i = add <16 x i8> %1, %0
tail call void (...) @sink(<16 x i8> %add.i)
; CHECK: lxv 34, 0(3)
; CHECK: lxv 35, 0(4)
; CHECK: lxvx 34, 0, 3
; CHECK: lxvx 35, 0, 4
; CHECK: vaddubm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
@ -45,8 +45,8 @@ entry:
%3 = load <16 x i8>, <16 x i8>* @scb, align 16
%add.i22 = add <16 x i8> %3, %2
tail call void (...) @sink(<16 x i8> %add.i22)
; CHECK: lxv 34, 0(3)
; CHECK: lxv 35, 0(4)
; CHECK: lxvx 34, 0, 3
; CHECK: lxvx 35, 0, 4
; CHECK: vaddubm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
@ -54,8 +54,8 @@ entry:
%5 = load <8 x i16>, <8 x i16>* @usb, align 16
%add.i21 = add <8 x i16> %5, %4
tail call void (...) @sink(<8 x i16> %add.i21)
; CHECK: lxv 34, 0(3)
; CHECK: lxv 35, 0(4)
; CHECK: lxvx 34, 0, 3
; CHECK: lxvx 35, 0, 4
; CHECK: vadduhm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
@ -63,8 +63,8 @@ entry:
%7 = load <8 x i16>, <8 x i16>* @ssb, align 16
%add.i20 = add <8 x i16> %7, %6
tail call void (...) @sink(<8 x i16> %add.i20)
; CHECK: lxv 34, 0(3)
; CHECK: lxv 35, 0(4)
; CHECK: lxvx 34, 0, 3
; CHECK: lxvx 35, 0, 4
; CHECK: vadduhm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
@ -72,8 +72,8 @@ entry:
%9 = load <4 x i32>, <4 x i32>* @uib, align 16
%add.i19 = add <4 x i32> %9, %8
tail call void (...) @sink(<4 x i32> %add.i19)
; CHECK: lxv 34, 0(3)
; CHECK: lxv 35, 0(4)
; CHECK: lxvx 34, 0, 3
; CHECK: lxvx 35, 0, 4
; CHECK: vadduwm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
@ -81,8 +81,8 @@ entry:
%11 = load <4 x i32>, <4 x i32>* @sib, align 16
%add.i18 = add <4 x i32> %11, %10
tail call void (...) @sink(<4 x i32> %add.i18)
; CHECK: lxv 34, 0(3)
; CHECK: lxv 35, 0(4)
; CHECK: lxvx 34, 0, 3
; CHECK: lxvx 35, 0, 4
; CHECK: vadduwm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
@ -90,8 +90,8 @@ entry:
%13 = load <2 x i64>, <2 x i64>* @ullb, align 16
%add.i17 = add <2 x i64> %13, %12
tail call void (...) @sink(<2 x i64> %add.i17)
; CHECK: lxv 34, 0(3)
; CHECK: lxv 35, 0(4)
; CHECK: lxvx 34, 0, 3
; CHECK: lxvx 35, 0, 4
; CHECK: vaddudm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
@ -99,8 +99,8 @@ entry:
%15 = load <2 x i64>, <2 x i64>* @sllb, align 16
%add.i16 = add <2 x i64> %15, %14
tail call void (...) @sink(<2 x i64> %add.i16)
; CHECK: lxv 34, 0(3)
; CHECK: lxv 35, 0(4)
; CHECK: lxvx 34, 0, 3
; CHECK: lxvx 35, 0, 4
; CHECK: vaddudm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
@ -108,8 +108,8 @@ entry:
%17 = load <1 x i128>, <1 x i128>* @uxb, align 16
%add.i15 = add <1 x i128> %17, %16
tail call void (...) @sink(<1 x i128> %add.i15)
; CHECK: lxv 34, 0(3)
; CHECK: lxv 35, 0(4)
; CHECK: lxvx 34, 0, 3
; CHECK: lxvx 35, 0, 4
; CHECK: vadduqm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
@ -117,8 +117,8 @@ entry:
%19 = load <1 x i128>, <1 x i128>* @sxb, align 16
%add.i14 = add <1 x i128> %19, %18
tail call void (...) @sink(<1 x i128> %add.i14)
; CHECK: lxv 34, 0(3)
; CHECK: lxv 35, 0(4)
; CHECK: lxvx 34, 0, 3
; CHECK: lxvx 35, 0, 4
; CHECK: vadduqm 2, 3, 2
; CHECK: stxv 34,
; CHECK: bl sink
@ -126,8 +126,8 @@ entry:
%21 = load <4 x float>, <4 x float>* @vfb, align 16
%add.i13 = fadd <4 x float> %20, %21
tail call void (...) @sink(<4 x float> %add.i13)
; CHECK: lxv 0, 0(3)
; CHECK: lxv 1, 0(4)
; CHECK: lxvx 0, 0, 3
; CHECK: lxvx 1, 0, 4
; CHECK: xvaddsp 34, 0, 1
; CHECK: stxv 34,
; CHECK: bl sink
@ -135,8 +135,8 @@ entry:
%23 = load <2 x double>, <2 x double>* @vdb, align 16
%add.i12 = fadd <2 x double> %22, %23
tail call void (...) @sink(<2 x double> %add.i12)
; CHECK: lxv 0, 0(3)
; CHECK: lxv 1, 0(4)
; CHECK: lxvx 0, 0, 3
; CHECK: lxvx 1, 0, 4
; CHECK: xvadddp 0, 0, 1
; CHECK: stxv 0,
; CHECK: bl sink