From 0747307049b358495d650de64d437c8fcdaf4f40 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Wed, 3 Nov 2010 22:44:51 +0000 Subject: [PATCH] Add support for code generation of the one register with immediate form of vorr. We could be more aggressive about making this work for a larger range of constants, but this seems like a good start. llvm-svn: 118201 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 29 +++++++++++++++++ llvm/lib/Target/ARM/ARMISelLowering.h | 5 ++- llvm/lib/Target/ARM/ARMInstrNEON.td | 41 ++++++++++++++++++++++++ llvm/test/CodeGen/ARM/vbits.ll | 20 ++++++++++++ llvm/test/MC/ARM/neon-bitwise-encoding.s | 5 +++ 5 files changed, 99 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 2e4fa32b7fad..7a3a747f50c2 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -101,6 +101,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); + setOperationAction(ISD::OR, VT.getSimpleVT(), Custom); setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand); setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand); for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -820,6 +821,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; case ARMISD::BFI: return "ARMISD::BFI"; + case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; } } @@ -3431,6 +3433,32 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, return SDValue(); } +static SDValue LowerOR(SDValue Op, SelectionDAG &DAG) { + SDValue Op1 = Op.getOperand(1); + while (Op1.getOpcode() == ISD::BIT_CONVERT && Op1.getOperand(0) != Op1) + Op1 = Op1.getOperand(0); + if (Op1.getOpcode() != ARMISD::VMOVIMM) return Op; + + ConstantSDNode* TargetConstant = cast(Op1.getOperand(0)); + uint32_t ConstVal = TargetConstant->getZExtValue(); + + // FIXME: VORRIMM only supports immediate encodings of 16 and 32 bit size. + // In theory for VMOVIMMs whose value is already encoded as with an + // 8 bit encoding, we could re-encode it as a 16 or 32 bit immediate. + EVT VorrVT = Op1.getValueType(); + EVT EltVT = VorrVT.getVectorElementType(); + if (EltVT != MVT::i16 && EltVT != MVT::i32) return Op; + + ConstVal |= 0x0100; + SDValue OrConst = DAG.getTargetConstant(ConstVal, MVT::i32); + + DebugLoc dl = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + SDValue toTy = DAG.getNode(ISD::BIT_CONVERT, dl, VorrVT, Op.getOperand(0)); + SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, toTy, OrConst); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vorr); +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, @@ -3899,6 +3927,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); case ISD::MUL: return LowerMUL(Op, DAG); + case ISD::OR: return LowerOR(Op, DAG); } return SDValue(); } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 8504b83d2432..4a7dec21583e 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -162,7 +162,10 @@ namespace llvm { FMIN, // Bit-field insert - BFI + BFI, + + // Vector OR with immediate + VORRIMM }; } diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index cc9fc1923cbf..29a1f2c360a0 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -69,6 +69,10 @@ def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; +def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; + def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; // VDUPLANE can produce a quad-register result from a double-register source, @@ -3295,6 +3299,43 @@ def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", v4i32, v4i32, or, 1>; +def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1, + (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), + IIC_VMOVImm, + "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", + [(set DPR:$Vd, + (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VORRiv2i32 : N1ModImm<1, 0b000, {?,?,?,1}, 0, 0, 0, 1, + (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src), + IIC_VMOVImm, + "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", + [(set DPR:$Vd, + (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { + let Inst{11-9} = SIMM{11-9}; +} + +def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1, + (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), + IIC_VMOVImm, + "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", + [(set QPR:$Vd, + (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { + let Inst{9} = SIMM{9}; +} + +def VORRiv4i32 : N1ModImm<1, 0b000, {?,?,?,1}, 0, 1, 0, 1, + (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src), + IIC_VMOVImm, + "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", + [(set QPR:$Vd, + (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { + let Inst{11-9} = SIMM{11-9}; +} + + // VBIC : Vector Bitwise Bit Clear (AND NOT) def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD, diff --git a/llvm/test/CodeGen/ARM/vbits.ll b/llvm/test/CodeGen/ARM/vbits.ll index 293d22938a76..352608b0d422 100644 --- a/llvm/test/CodeGen/ARM/vbits.ll +++ b/llvm/test/CodeGen/ARM/vbits.ll @@ -505,3 +505,23 @@ define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { %tmp5 = sext <4 x i1> %tmp4 to <4 x i32> ret <4 x i32> %tmp5 } + +define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind { +; CHECK: v_orrimm: +; CHECK-NOT: vmov +; CHECK-NOT: vmvn +; CHECK: vorr + %tmp1 = load <8 x i8>* %A + %tmp3 = or <8 x i8> %tmp1, + ret <8 x i8> %tmp3 +} + +define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind { +; CHECK: v_orrimmQ +; CHECK-NOT: vmov +; CHECK-NOT: vmvn +; CHECK: vorr + %tmp1 = load <16 x i8>* %A + %tmp3 = or <16 x i8> %tmp1, + ret <16 x i8> %tmp3 +} diff --git a/llvm/test/MC/ARM/neon-bitwise-encoding.s b/llvm/test/MC/ARM/neon-bitwise-encoding.s index 13fe53cd1413..345b6bb2d268 100644 --- a/llvm/test/MC/ARM/neon-bitwise-encoding.s +++ b/llvm/test/MC/ARM/neon-bitwise-encoding.s @@ -1,4 +1,5 @@ @ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unkown -show-encoding < %s | FileCheck %s +@ XFAIL: * @ CHECK: vand d16, d17, d16 @ encoding: [0xb0,0x01,0x41,0xf2] vand d16, d17, d16 @@ -14,6 +15,10 @@ vorr d16, d17, d16 @ CHECK: vorr q8, q8, q9 @ encoding: [0xf2,0x01,0x60,0xf2] vorr q8, q8, q9 +@ CHECK: vorr.i32 d16, #0x1000000 @ encoding: [0x11,0x07,0xc0,0xf2] + vorr.i32 d16, #0x1000000 +@ CHECK: vorr.i32 q8, #0x1000000 @ encoding: [0x51,0x07,0xc0,0xf2] + vorr.i32 q8, #0x1000000 @ CHECK: vbic d16, d17, d16 @ encoding: [0xb0,0x01,0x51,0xf2] vbic d16, d17, d16