Add support for code generation of the one register with immediate form of vorr.

We could be more aggressive about making this work for a larger range of constants,
but this seems like a good start.

llvm-svn: 118201
This commit is contained in:
Owen Anderson 2010-11-03 22:44:51 +00:00
parent f211510ff6
commit 0747307049
5 changed files with 99 additions and 1 deletions

View File

@ -101,6 +101,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
setOperationAction(ISD::OR, VT.getSimpleVT(), Custom);
setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
@ -820,6 +821,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
case ARMISD::BFI: return "ARMISD::BFI";
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
}
}
@ -3431,6 +3433,32 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
return SDValue();
}
static SDValue LowerOR(SDValue Op, SelectionDAG &DAG) {
SDValue Op1 = Op.getOperand(1);
while (Op1.getOpcode() == ISD::BIT_CONVERT && Op1.getOperand(0) != Op1)
Op1 = Op1.getOperand(0);
if (Op1.getOpcode() != ARMISD::VMOVIMM) return Op;
ConstantSDNode* TargetConstant = cast<ConstantSDNode>(Op1.getOperand(0));
uint32_t ConstVal = TargetConstant->getZExtValue();
// FIXME: VORRIMM only supports immediate encodings of 16 and 32 bit size.
// In theory for VMOVIMMs whose value is already encoded as with an
// 8 bit encoding, we could re-encode it as a 16 or 32 bit immediate.
EVT VorrVT = Op1.getValueType();
EVT EltVT = VorrVT.getVectorElementType();
if (EltVT != MVT::i16 && EltVT != MVT::i32) return Op;
ConstVal |= 0x0100;
SDValue OrConst = DAG.getTargetConstant(ConstVal, MVT::i32);
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
SDValue toTy = DAG.getNode(ISD::BIT_CONVERT, dl, VorrVT, Op.getOperand(0));
SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, toTy, OrConst);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vorr);
}
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
@ -3899,6 +3927,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
case ISD::OR: return LowerOR(Op, DAG);
}
return SDValue();
}

View File

@ -162,7 +162,10 @@ namespace llvm {
FMIN,
// Bit-field insert
BFI
BFI,
// Vector OR with immediate
VORRIMM
};
}

View File

@ -69,6 +69,10 @@ def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
// VDUPLANE can produce a quad-register result from a double-register source,
@ -3295,6 +3299,43 @@ def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
v4i32, v4i32, or, 1>;
def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
(outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
IIC_VMOVImm,
"vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
(v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VORRiv2i32 : N1ModImm<1, 0b000, {?,?,?,1}, 0, 0, 0, 1,
(outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
IIC_VMOVImm,
"vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set DPR:$Vd,
(v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
let Inst{11-9} = SIMM{11-9};
}
def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
(outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
IIC_VMOVImm,
"vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
(v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
let Inst{9} = SIMM{9};
}
def VORRiv4i32 : N1ModImm<1, 0b000, {?,?,?,1}, 0, 1, 0, 1,
(outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
IIC_VMOVImm,
"vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
[(set QPR:$Vd,
(v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
let Inst{11-9} = SIMM{11-9};
}
// VBIC : Vector Bitwise Bit Clear (AND NOT)
def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
(ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,

View File

@ -505,3 +505,23 @@ define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
%tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
ret <4 x i32> %tmp5
}
define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
; CHECK: v_orrimm:
; CHECK-NOT: vmov
; CHECK-NOT: vmvn
; CHECK: vorr
%tmp1 = load <8 x i8>* %A
%tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
ret <8 x i8> %tmp3
}
define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
; CHECK: v_orrimmQ
; CHECK-NOT: vmov
; CHECK-NOT: vmvn
; CHECK: vorr
%tmp1 = load <16 x i8>* %A
%tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
ret <16 x i8> %tmp3
}

View File

@ -1,4 +1,5 @@
@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unkown -show-encoding < %s | FileCheck %s
@ XFAIL: *
@ CHECK: vand d16, d17, d16 @ encoding: [0xb0,0x01,0x41,0xf2]
vand d16, d17, d16
@ -14,6 +15,10 @@
vorr d16, d17, d16
@ CHECK: vorr q8, q8, q9 @ encoding: [0xf2,0x01,0x60,0xf2]
vorr q8, q8, q9
@ CHECK: vorr.i32 d16, #0x1000000 @ encoding: [0x11,0x07,0xc0,0xf2]
vorr.i32 d16, #0x1000000
@ CHECK: vorr.i32 q8, #0x1000000 @ encoding: [0x51,0x07,0xc0,0xf2]
vorr.i32 q8, #0x1000000
@ CHECK: vbic d16, d17, d16 @ encoding: [0xb0,0x01,0x51,0xf2]
vbic d16, d17, d16