Support alignment specifier for NEON vld/vst instructions

llvm-svn: 86404
This commit is contained in:
Jim Grosbach 2009-11-07 21:25:39 +00:00
parent b9397262b7
commit d1d002a6fe
5 changed files with 61 additions and 46 deletions

View File

@ -541,13 +541,15 @@ namespace ARM_AM {
//
// This is used for NEON load / store instructions.
//
// addrmode6 := reg with optional writeback
// addrmode6 := reg with optional writeback and alignment
//
// This is stored in three operands [regaddr, regupdate, opc]. The first is
// the address register. The second register holds the value of a post-access
// increment for writeback or reg0 if no writeback or if the writeback
// increment is the size of the memory access. The third operand encodes
// whether there is writeback to the address register.
// This is stored in four operands [regaddr, regupdate, opc, align]. The
// first is the address register. The second register holds the value of
// a post-access increment for writeback or reg0 if no writeback or if the
// writeback increment is the size of the memory access. The third
// operand encodes whether there is writeback to the address register. The
// fourth operand is the value of the alignment specifier to use or zero if
// no explicit alignment.
static inline unsigned getAM6Opc(bool WB = false) {
return (int)WB;

View File

@ -81,7 +81,7 @@ public:
bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base,
SDValue &Offset);
bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update,
SDValue &Opc);
SDValue &Opc, SDValue &Align);
bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset,
SDValue &Label);
@ -489,11 +489,13 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N,
SDValue &Addr, SDValue &Update,
SDValue &Opc) {
SDValue &Opc, SDValue &Align) {
Addr = N;
// Default to no writeback.
Update = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
// Default to no alignment.
Align = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@ -1008,8 +1010,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
SDNode *N = Op.getNode();
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
SDValue MemAddr, MemUpdate, MemOpc, Align;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
return NULL;
SDValue Chain = N->getOperand(0);
@ -1034,10 +1036,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
std::vector<EVT> ResTys(NumVecs, VT);
ResTys.push_back(MVT::Other);
return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
}
EVT RegVT = GetNEONSubregVT(VT);
@ -1045,10 +1047,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
// Quad registers are directly supported for VLD2,
// loading 2 pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
std::vector<EVT> ResTys(4, VT);
ResTys.push_back(MVT::Other);
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
Chain = SDValue(VLd, 4);
// Combine the even and odd subregs to produce the result.
@ -1069,14 +1071,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
// Load the even subregs.
unsigned Opc = QOpcodes0[OpcodeIndex];
const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain };
SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4);
const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5);
Chain = SDValue(VLdA, NumVecs+1);
// Load the odd subregs.
Opc = QOpcodes1[OpcodeIndex];
const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Chain };
SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4);
const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc,
Align, Chain };
SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5);
Chain = SDValue(VLdB, NumVecs+1);
// Combine the even and odd subregs to produce the result.
@ -1096,8 +1099,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
SDNode *N = Op.getNode();
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
SDValue MemAddr, MemUpdate, MemOpc, Align;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
return NULL;
SDValue Chain = N->getOperand(0);
@ -1124,13 +1127,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
Ops.push_back(MemAddr);
Ops.push_back(MemUpdate);
Ops.push_back(MemOpc);
Ops.push_back(Align);
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
Ops.push_back(N->getOperand(Vec+3));
Ops.push_back(Chain);
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+4);
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
}
EVT RegVT = GetNEONSubregVT(VT);
@ -1145,7 +1149,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
N->getOperand(Vec+3)));
}
Ops.push_back(Chain);
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 8);
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
}
// Otherwise, quad registers are stored with two separate instructions,
@ -1161,18 +1165,18 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
Ops.push_back(Chain);
unsigned Opc = QOpcodes0[OpcodeIndex];
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
MVT::Other, Ops.data(), NumVecs+4);
MVT::Other, Ops.data(), NumVecs+5);
Chain = SDValue(VStA, 1);
// Store the odd subregs.
Ops[0] = SDValue(VStA, 0); // MemAddr
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
N->getOperand(Vec+3));
Ops[NumVecs+3] = Chain;
Ops[NumVecs+4] = Chain;
Opc = QOpcodes1[OpcodeIndex];
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
MVT::Other, Ops.data(), NumVecs+4);
MVT::Other, Ops.data(), NumVecs+5);
Chain = SDValue(VStB, 1);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
@ -1186,8 +1190,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
SDNode *N = Op.getNode();
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, MemUpdate, MemOpc;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
SDValue MemAddr, MemUpdate, MemOpc, Align;
if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
return NULL;
SDValue Chain = N->getOperand(0);
@ -1224,6 +1228,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
Ops.push_back(MemAddr);
Ops.push_back(MemUpdate);
Ops.push_back(MemOpc);
Ops.push_back(Align);
unsigned Opc = 0;
if (is64BitVector) {

View File

@ -340,9 +340,9 @@ def addrmode5 : Operand<i32>,
// addrmode6 := reg with optional writeback
//
def addrmode6 : Operand<i32>,
ComplexPattern<i32, 3, "SelectAddrMode6", []> {
ComplexPattern<i32, 4, "SelectAddrMode6", []> {
let PrintMethod = "printAddrMode6Operand";
let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm);
let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm);
}
// addrmodepc := pc + reg

View File

@ -638,9 +638,17 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) {
const MachineOperand &MO1 = MI->getOperand(Op);
const MachineOperand &MO2 = MI->getOperand(Op+1);
const MachineOperand &MO3 = MI->getOperand(Op+2);
const MachineOperand &MO4 = MI->getOperand(Op+3);
// FIXME: No support yet for specifying alignment.
O << "[" << getRegisterName(MO1.getReg()) << "]";
O << "[" << getRegisterName(MO1.getReg());
if (MO4.getImm()) {
if (Subtarget->isTargetDarwin())
O << ", :";
else
O << " @";
O << MO4.getImm();
}
O << "]";
if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
if (MO2.getReg() == 0)

View File

@ -177,20 +177,20 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST2LNd8:
case ARM::VST2LNd16:
case ARM::VST2LNd32:
FirstOpnd = 3;
FirstOpnd = 4;
NumRegs = 2;
return true;
case ARM::VST2q8:
case ARM::VST2q16:
case ARM::VST2q32:
FirstOpnd = 3;
FirstOpnd = 4;
NumRegs = 4;
return true;
case ARM::VST2LNq16a:
case ARM::VST2LNq32a:
FirstOpnd = 3;
FirstOpnd = 4;
NumRegs = 2;
Offset = 0;
Stride = 2;
@ -198,7 +198,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST2LNq16b:
case ARM::VST2LNq32b:
FirstOpnd = 3;
FirstOpnd = 4;
NumRegs = 2;
Offset = 1;
Stride = 2;
@ -211,14 +211,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3LNd8:
case ARM::VST3LNd16:
case ARM::VST3LNd32:
FirstOpnd = 3;
FirstOpnd = 4;
NumRegs = 3;
return true;
case ARM::VST3q8a:
case ARM::VST3q16a:
case ARM::VST3q32a:
FirstOpnd = 4;
FirstOpnd = 5;
NumRegs = 3;
Offset = 0;
Stride = 2;
@ -227,7 +227,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3q8b:
case ARM::VST3q16b:
case ARM::VST3q32b:
FirstOpnd = 4;
FirstOpnd = 5;
NumRegs = 3;
Offset = 1;
Stride = 2;
@ -235,7 +235,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3LNq16a:
case ARM::VST3LNq32a:
FirstOpnd = 3;
FirstOpnd = 4;
NumRegs = 3;
Offset = 0;
Stride = 2;
@ -243,7 +243,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3LNq16b:
case ARM::VST3LNq32b:
FirstOpnd = 3;
FirstOpnd = 4;
NumRegs = 3;
Offset = 1;
Stride = 2;
@ -256,14 +256,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4LNd8:
case ARM::VST4LNd16:
case ARM::VST4LNd32:
FirstOpnd = 3;
FirstOpnd = 4;
NumRegs = 4;
return true;
case ARM::VST4q8a:
case ARM::VST4q16a:
case ARM::VST4q32a:
FirstOpnd = 4;
FirstOpnd = 5;
NumRegs = 4;
Offset = 0;
Stride = 2;
@ -272,7 +272,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4q8b:
case ARM::VST4q16b:
case ARM::VST4q32b:
FirstOpnd = 4;
FirstOpnd = 5;
NumRegs = 4;
Offset = 1;
Stride = 2;
@ -280,7 +280,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4LNq16a:
case ARM::VST4LNq32a:
FirstOpnd = 3;
FirstOpnd = 4;
NumRegs = 4;
Offset = 0;
Stride = 2;
@ -288,7 +288,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4LNq16b:
case ARM::VST4LNq32b:
FirstOpnd = 3;
FirstOpnd = 4;
NumRegs = 4;
Offset = 1;
Stride = 2;