Add codegen patterns for VST1-lane instructions. Radar 8599955.

llvm-svn: 118176
This commit is contained in:
Bob Wilson 2010-11-03 16:24:53 +00:00
parent ceb49296ef
commit 7d0ac84abd
4 changed files with 83 additions and 13 deletions

View File

@ -1126,28 +1126,37 @@ class VSTQQQQLNWBPseudo<InstrItinClass itin>
nohash_imm:$lane), itin, "$addr.addr = $wb">; nohash_imm:$lane), itin, "$addr.addr = $wb">;
// VST1LN : Vector Store (single element from one lane) // VST1LN : Vector Store (single element from one lane)
class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt> class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
PatFrag StoreOp, SDNode ExtractOp>
: NLdStLn<1, 0b00, op11_8, op7_4, (outs), : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane), (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", []> { IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
[(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
let Rm = 0b1111; let Rm = 0b1111;
} }
class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
: VSTQLNPseudo<IIC_VST1ln> {
let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
addrmode6:$addr)];
}
def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8"> { def VST1LNd8 : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
NEONvgetlaneu> {
let Inst{7-5} = lane{2-0}; let Inst{7-5} = lane{2-0};
} }
def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16"> { def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
NEONvgetlaneu> {
let Inst{7-6} = lane{1-0}; let Inst{7-6} = lane{1-0};
let Inst{4} = Rn{5}; let Inst{4} = Rn{5};
} }
def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32"> { def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
let Inst{7} = lane{0}; let Inst{7} = lane{0};
let Inst{5-4} = Rn{5-4}; let Inst{5-4} = Rn{5-4};
} }
def VST1LNq8Pseudo : VSTQLNPseudo<IIC_VST1ln>; def VST1LNq8Pseudo : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
def VST1LNq16Pseudo : VSTQLNPseudo<IIC_VST1ln>; def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
def VST1LNq32Pseudo : VSTQLNPseudo<IIC_VST1ln>; def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {

View File

@ -102,7 +102,8 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
%0 = load <4 x i16>* %arg0_uint16x4_t, align 8 ; <<4 x i16>> [#uses=1] %0 = load <4 x i16>* %arg0_uint16x4_t, align 8 ; <<4 x i16>> [#uses=1]
%1 = extractelement <4 x i16> %0, i32 1 ; <i16> [#uses=1] %1 = extractelement <4 x i16> %0, i32 1 ; <i16> [#uses=1]
store i16 %1, i16* %out_uint16_t, align 2 %2 = add i16 %1, %1
store i16 %2, i16* %out_uint16_t, align 2
br label %return br label %return
return: ; preds = %entry return: ; preds = %entry
@ -117,7 +118,8 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
%0 = load <8 x i8>* %arg0_uint8x8_t, align 8 ; <<8 x i8>> [#uses=1] %0 = load <8 x i8>* %arg0_uint8x8_t, align 8 ; <<8 x i8>> [#uses=1]
%1 = extractelement <8 x i8> %0, i32 1 ; <i8> [#uses=1] %1 = extractelement <8 x i8> %0, i32 1 ; <i8> [#uses=1]
store i8 %1, i8* %out_uint8_t, align 1 %2 = add i8 %1, %1
store i8 %2, i8* %out_uint8_t, align 1
br label %return br label %return
return: ; preds = %entry return: ; preds = %entry
@ -132,7 +134,8 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
%0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1] %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
%1 = extractelement <8 x i16> %0, i32 1 ; <i16> [#uses=1] %1 = extractelement <8 x i16> %0, i32 1 ; <i16> [#uses=1]
store i16 %1, i16* %out_uint16_t, align 2 %2 = add i16 %1, %1
store i16 %2, i16* %out_uint16_t, align 2
br label %return br label %return
return: ; preds = %entry return: ; preds = %entry
@ -147,7 +150,8 @@ entry:
%"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0] %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
%0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1] %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
%1 = extractelement <16 x i8> %0, i32 1 ; <i8> [#uses=1] %1 = extractelement <16 x i8> %0, i32 1 ; <i8> [#uses=1]
store i8 %1, i8* %out_uint8_t, align 1 %2 = add i8 %1, %1
store i8 %2, i8* %out_uint8_t, align 1
br label %return br label %return
return: ; preds = %entry return: ; preds = %entry

View File

@ -22,7 +22,7 @@ define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind { define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vld1lanei32: ;CHECK: vld1lanei32:
;Check the alignment value. Max for this instruction is 16 bits: ;Check the alignment value. Max for this instruction is 32 bits:
;CHECK: vld1.32 {d16[1]}, [r0, :32] ;CHECK: vld1.32 {d16[1]}, [r0, :32]
%tmp1 = load <2 x i32>* %B %tmp1 = load <2 x i32>* %B
%tmp2 = load i32* %A, align 8 %tmp2 = load i32* %A, align 8

View File

@ -1,5 +1,62 @@
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vst1lanei8:
;Check the (default) alignment.
;CHECK: vst1.8 {d16[3]}, [r0]
%tmp1 = load <8 x i8>* %B
%tmp2 = extractelement <8 x i8> %tmp1, i32 3
store i8 %tmp2, i8* %A, align 8
ret void
}
define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
;CHECK: vst1lanei16:
;Check the alignment value. Max for this instruction is 16 bits:
;CHECK: vst1.16 {d16[2]}, [r0, :16]
%tmp1 = load <4 x i16>* %B
%tmp2 = extractelement <4 x i16> %tmp1, i32 2
store i16 %tmp2, i16* %A, align 8
ret void
}
define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
;CHECK: vst1lanei32:
;Check the alignment value. Max for this instruction is 32 bits:
;CHECK: vst1.32 {d16[1]}, [r0, :32]
%tmp1 = load <2 x i32>* %B
%tmp2 = extractelement <2 x i32> %tmp1, i32 1
store i32 %tmp2, i32* %A, align 8
ret void
}
define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
;CHECK: vst1laneQi8:
;CHECK: vst1.8 {d17[1]}, [r0]
%tmp1 = load <16 x i8>* %B
%tmp2 = extractelement <16 x i8> %tmp1, i32 9
store i8 %tmp2, i8* %A, align 8
ret void
}
define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
;CHECK: vst1laneQi16:
;CHECK: vst1.16 {d17[1]}, [r0, :16]
%tmp1 = load <8 x i16>* %B
%tmp2 = extractelement <8 x i16> %tmp1, i32 5
store i16 %tmp2, i16* %A, align 8
ret void
}
define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
;CHECK: vst1laneQi32:
;CHECK: vst1.32 {d17[1]}, [r0, :32]
%tmp1 = load <4 x i32>* %B
%tmp2 = extractelement <4 x i32> %tmp1, i32 3
store i32 %tmp2, i32* %A, align 8
ret void
}
define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind { define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
;CHECK: vst2lanei8: ;CHECK: vst2lanei8:
;Check the alignment value. Max for this instruction is 16 bits: ;Check the alignment value. Max for this instruction is 16 bits: