ARM64: add patterns for more lane-wise ld1/st1 operations.

llvm-svn: 205294
This commit is contained in:
Tim Northover 2014-04-01 10:37:09 +00:00
parent d8d613b979
commit ff179ba3d3
4 changed files with 268 additions and 139 deletions

View File

@ -7971,8 +7971,7 @@ multiclass SIMDLdSingleSTied<bit R, bits<3> opcode, bits<2> size,string asm,
}
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
multiclass SIMDLdSingleDTied<bit R, bits<3> opcode, bits<2> size, string asm,
RegisterOperand listtype,
RegisterOperand GPR64pi> {
RegisterOperand listtype, RegisterOperand GPR64pi> {
def i64 : SIMDLdStSingleDTied<1, R, opcode, size, asm,
(outs listtype:$dst),
(ins listtype:$Vt, VectorIndexD:$idx,
@ -7985,12 +7984,10 @@ multiclass SIMDLdSingleDTied<bit R, bits<3> opcode, bits<2> size, string asm,
}
let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
multiclass SIMDStSingleB<bit R, bits<3> opcode, string asm,
RegisterOperand listtype, list<dag> pattern,
RegisterOperand GPR64pi> {
RegisterOperand listtype, RegisterOperand GPR64pi> {
def i8 : SIMDLdStSingleB<0, R, opcode, asm,
(outs), (ins listtype:$Vt, VectorIndexB:$idx,
am_simdnoindex:$vaddr),
pattern>;
am_simdnoindex:$vaddr), []>;
def i8_POST : SIMDLdStSingleBPost<0, R, opcode, asm,
(outs), (ins listtype:$Vt, VectorIndexB:$idx,
@ -7998,12 +7995,10 @@ multiclass SIMDStSingleB<bit R, bits<3> opcode, string asm,
}
let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
multiclass SIMDStSingleH<bit R, bits<3> opcode, bit size, string asm,
RegisterOperand listtype, list<dag> pattern,
RegisterOperand GPR64pi> {
RegisterOperand listtype, RegisterOperand GPR64pi> {
def i16 : SIMDLdStSingleH<0, R, opcode, size, asm,
(outs), (ins listtype:$Vt, VectorIndexH:$idx,
am_simdnoindex:$vaddr),
pattern>;
am_simdnoindex:$vaddr), []>;
def i16_POST : SIMDLdStSingleHPost<0, R, opcode, size, asm,
(outs), (ins listtype:$Vt, VectorIndexH:$idx,
@ -8011,12 +8006,10 @@ multiclass SIMDStSingleH<bit R, bits<3> opcode, bit size, string asm,
}
let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
multiclass SIMDStSingleS<bit R, bits<3> opcode, bits<2> size,string asm,
RegisterOperand listtype, list<dag> pattern,
RegisterOperand GPR64pi> {
RegisterOperand listtype, RegisterOperand GPR64pi> {
def i32 : SIMDLdStSingleS<0, R, opcode, size, asm,
(outs), (ins listtype:$Vt, VectorIndexS:$idx,
am_simdnoindex:$vaddr),
pattern>;
am_simdnoindex:$vaddr), []>;
def i32_POST : SIMDLdStSingleSPost<0, R, opcode, size, asm,
(outs), (ins listtype:$Vt, VectorIndexS:$idx,
@ -8024,11 +8017,10 @@ multiclass SIMDStSingleS<bit R, bits<3> opcode, bits<2> size,string asm,
}
let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
multiclass SIMDStSingleD<bit R, bits<3> opcode, bits<2> size, string asm,
RegisterOperand listtype, list<dag> pattern,
RegisterOperand GPR64pi> {
RegisterOperand listtype, RegisterOperand GPR64pi> {
def i64 : SIMDLdStSingleD<0, R, opcode, size, asm,
(outs), (ins listtype:$Vt, VectorIndexD:$idx,
am_simdnoindex:$vaddr), pattern>;
am_simdnoindex:$vaddr), []>;
def i64_POST : SIMDLdStSingleDPost<0, R, opcode, size, asm,
(outs), (ins listtype:$Vt, VectorIndexD:$idx,

View File

@ -4087,18 +4087,32 @@ def : Pat<(v2f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))),
def : Pat<(v1f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))),
(LD1Rv1d am_simdnoindex:$vaddr)>;
def : Pat<(vector_insert (v16i8 VecListOne128:$Rd),
(i32 (extloadi8 am_simdnoindex:$vaddr)), VectorIndexB:$idx),
(LD1i8 VecListOne128:$Rd, VectorIndexB:$idx, am_simdnoindex:$vaddr)>;
def : Pat<(vector_insert (v8i16 VecListOne128:$Rd),
(i32 (extloadi16 am_simdnoindex:$vaddr)), VectorIndexH:$idx),
(LD1i16 VecListOne128:$Rd, VectorIndexH:$idx, am_simdnoindex:$vaddr)>;
def : Pat<(vector_insert (v4i32 VecListOne128:$Rd),
(i32 (load am_simdnoindex:$vaddr)), VectorIndexS:$idx),
(LD1i32 VecListOne128:$Rd, VectorIndexS:$idx, am_simdnoindex:$vaddr)>;
def : Pat<(vector_insert (v2i64 VecListOne128:$Rd),
(i64 (load am_simdnoindex:$vaddr)), VectorIndexD:$idx),
(LD1i64 VecListOne128:$Rd, VectorIndexD:$idx, am_simdnoindex:$vaddr)>;
class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
: Pat<(vector_insert (VTy VecListOne128:$Rd),
(STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx),
(LD1 VecListOne128:$Rd, VecIndex:$idx, am_simdnoindex:$vaddr)>;
def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>;
def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>;
def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>;
def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>;
def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>;
class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction LD1>
: Pat<(vector_insert (VTy VecListOne64:$Rd),
(STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx),
(EXTRACT_SUBREG
(LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
VecIndex:$idx, am_simdnoindex:$vaddr),
dsub)>;
def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>;
def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>;
def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>;
defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
@ -4107,38 +4121,53 @@ defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
// Stores
let AddedComplexity = 8 in {
defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb,
[(truncstorei8
(i32 (vector_extract (v16i8 VecListOneb:$Vt), VectorIndexB:$idx)),
am_simdnoindex:$vaddr)], GPR64pi1>;
defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh,
[(truncstorei16
(i32 (vector_extract (v8i16 VecListOneh:$Vt), VectorIndexH:$idx)),
am_simdnoindex:$vaddr)], GPR64pi2>;
defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes,
[(store
(i32 (vector_extract (v4i32 VecListOnes:$Vt), VectorIndexS:$idx)),
am_simdnoindex:$vaddr)], GPR64pi4>;
defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned,
[(store
(i64 (vector_extract (v2i64 VecListOned:$Vt), VectorIndexD:$idx)),
am_simdnoindex:$vaddr)], GPR64pi8>;
}
defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>;
defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>;
defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
let AddedComplexity = 8 in
class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1>
: Pat<(scalar_store
(STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
am_simdnoindex:$vaddr),
(ST1 VecListOne128:$Vt, VecIndex:$idx, am_simdnoindex:$vaddr)>;
def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>;
def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>;
def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>;
def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>;
def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>;
let AddedComplexity = 8 in
class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
ValueType VTy, ValueType STy, Instruction ST1>
: Pat<(scalar_store
(STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
am_simdnoindex:$vaddr),
(ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
VecIndex:$idx, am_simdnoindex:$vaddr)>;
def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>;
def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>;
def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>;
let mayStore = 1, neverHasSideEffects = 1 in {
defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, [], GPR64pi2>;
defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, [], GPR64pi4>;
defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, [], GPR64pi8>;
defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, [], GPR64pi16>;
defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, [], GPR64pi3>;
defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, [], GPR64pi6>;
defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, [], GPR64pi12>;
defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, [], GPR64pi24>;
defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, [], GPR64pi4>;
defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, [], GPR64pi8>;
defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, [], GPR64pi16>;
defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, [], GPR64pi32>;
defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>;
defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>;
defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>;
defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>;
defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>;
defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>;
defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>;
defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>;
defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>;
}
defm ST1 : SIMDLdSt1SingleAliases<"st1">;

View File

@ -5,7 +5,7 @@
%struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> }
define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind {
; CHECK: ld2_8b
; CHECK-LABEL: ld2_8b
; Make sure we are loading into the results defined by the ABI (i.e., v0, v1)
; and from the argument of the function also defined by ABI (i.e., x0)
; CHECK ld2.8b { v0, v1 }, [x0]
@ -15,7 +15,7 @@ define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind {
}
define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind {
; CHECK: ld3_8b
; CHECK-LABEL: ld3_8b
; Make sure we are using the operands defined by the ABI
; CHECK ld3.8b { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@ -24,7 +24,7 @@ define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind {
}
define %struct.__neon_int8x8x4_t @ld4_8b(i8* %A) nounwind {
; CHECK: ld4_8b
; CHECK-LABEL: ld4_8b
; Make sure we are using the operands defined by the ABI
; CHECK ld4.8b { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@ -41,7 +41,7 @@ declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4.v8i8.p0i8(i8*) nounwind r
%struct.__neon_int8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> }
define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind {
; CHECK: ld2_16b
; CHECK-LABEL: ld2_16b
; Make sure we are using the operands defined by the ABI
; CHECK ld2.16b { v0, v1 }, [x0]
; CHECK-NEXT ret
@ -50,7 +50,7 @@ define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind {
}
define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind {
; CHECK: ld3_16b
; CHECK-LABEL: ld3_16b
; Make sure we are using the operands defined by the ABI
; CHECK ld3.16b { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@ -59,7 +59,7 @@ define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind {
}
define %struct.__neon_int8x16x4_t @ld4_16b(i8* %A) nounwind {
; CHECK: ld4_16b
; CHECK-LABEL: ld4_16b
; Make sure we are using the operands defined by the ABI
; CHECK ld4.16b { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@ -76,7 +76,7 @@ declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4.v16i8.p0i8(i8*) nounwind
%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind {
; CHECK: ld2_4h
; CHECK-LABEL: ld2_4h
; Make sure we are using the operands defined by the ABI
; CHECK ld2.4h { v0, v1 }, [x0]
; CHECK-NEXT ret
@ -85,7 +85,7 @@ define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind {
}
define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind {
; CHECK: ld3_4h
; CHECK-LABEL: ld3_4h
; Make sure we are using the operands defined by the ABI
; CHECK ld3.4h { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@ -94,7 +94,7 @@ define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind {
}
define %struct.__neon_int16x4x4_t @ld4_4h(i16* %A) nounwind {
; CHECK: ld4_4h
; CHECK-LABEL: ld4_4h
; Make sure we are using the operands defined by the ABI
; CHECK ld4.4h { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@ -111,7 +111,7 @@ declare %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4.v4i16.p0i16(i16*) nounwi
%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind {
; CHECK: ld2_8h
; CHECK-LABEL: ld2_8h
; Make sure we are using the operands defined by the ABI
; CHECK ld2.8h { v0, v1 }, [x0]
; CHECK-NEXT ret
@ -120,7 +120,7 @@ define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind {
}
define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind {
; CHECK: ld3_8h
; CHECK-LABEL: ld3_8h
; Make sure we are using the operands defined by the ABI
; CHECK ld3.8h { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@ -129,7 +129,7 @@ define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind {
}
define %struct.__neon_int16x8x4_t @ld4_8h(i16* %A) nounwind {
; CHECK: ld4_8h
; CHECK-LABEL: ld4_8h
; Make sure we are using the operands defined by the ABI
; CHECK ld4.8h { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@ -146,7 +146,7 @@ declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4.v8i16.p0i16(i16*) nounwi
%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind {
; CHECK: ld2_2s
; CHECK-LABEL: ld2_2s
; Make sure we are using the operands defined by the ABI
; CHECK ld2.2s { v0, v1 }, [x0]
; CHECK-NEXT ret
@ -155,7 +155,7 @@ define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind {
}
define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind {
; CHECK: ld3_2s
; CHECK-LABEL: ld3_2s
; Make sure we are using the operands defined by the ABI
; CHECK ld3.2s { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@ -164,7 +164,7 @@ define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind {
}
define %struct.__neon_int32x2x4_t @ld4_2s(i32* %A) nounwind {
; CHECK: ld4_2s
; CHECK-LABEL: ld4_2s
; Make sure we are using the operands defined by the ABI
; CHECK ld4.2s { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@ -181,7 +181,7 @@ declare %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4.v2i32.p0i32(i32*) nounwi
%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind {
; CHECK: ld2_4s
; CHECK-LABEL: ld2_4s
; Make sure we are using the operands defined by the ABI
; CHECK ld2.4s { v0, v1 }, [x0]
; CHECK-NEXT ret
@ -190,7 +190,7 @@ define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind {
}
define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind {
; CHECK: ld3_4s
; CHECK-LABEL: ld3_4s
; Make sure we are using the operands defined by the ABI
; CHECK ld3.4s { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@ -199,7 +199,7 @@ define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind {
}
define %struct.__neon_int32x4x4_t @ld4_4s(i32* %A) nounwind {
; CHECK: ld4_4s
; CHECK-LABEL: ld4_4s
; Make sure we are using the operands defined by the ABI
; CHECK ld4.4s { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@ -216,7 +216,7 @@ declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4.v4i32.p0i32(i32*) nounwi
%struct.__neon_int64x2x4_t = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }
define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind {
; CHECK: ld2_2d
; CHECK-LABEL: ld2_2d
; Make sure we are using the operands defined by the ABI
; CHECK ld2.2d { v0, v1 }, [x0]
; CHECK-NEXT ret
@ -225,7 +225,7 @@ define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind {
}
define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind {
; CHECK: ld3_2d
; CHECK-LABEL: ld3_2d
; Make sure we are using the operands defined by the ABI
; CHECK ld3.2d { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@ -234,7 +234,7 @@ define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind {
}
define %struct.__neon_int64x2x4_t @ld4_2d(i64* %A) nounwind {
; CHECK: ld4_2d
; CHECK-LABEL: ld4_2d
; Make sure we are using the operands defined by the ABI
; CHECK ld4.2d { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@ -252,7 +252,7 @@ declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4.v2i64.p0i64(i64*) nounwi
define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind {
; CHECK: ld2_1di64
; CHECK-LABEL: ld2_1di64
; Make sure we are using the operands defined by the ABI
; CHECK ld1.1d { v0, v1 }, [x0]
; CHECK-NEXT ret
@ -261,7 +261,7 @@ define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind {
}
define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind {
; CHECK: ld3_1di64
; CHECK-LABEL: ld3_1di64
; Make sure we are using the operands defined by the ABI
; CHECK ld1.1d { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@ -270,7 +270,7 @@ define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind {
}
define %struct.__neon_int64x1x4_t @ld4_1di64(i64* %A) nounwind {
; CHECK: ld4_1di64
; CHECK-LABEL: ld4_1di64
; Make sure we are using the operands defined by the ABI
; CHECK ld1.1d { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@ -289,7 +289,7 @@ declare %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4.v1i64.p0i64(i64*) nounwi
define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind {
; CHECK: ld2_1df64
; CHECK-LABEL: ld2_1df64
; Make sure we are using the operands defined by the ABI
; CHECK ld1.1d { v0, v1 }, [x0]
; CHECK-NEXT ret
@ -298,7 +298,7 @@ define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind {
}
define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind {
; CHECK: ld3_1df64
; CHECK-LABEL: ld3_1df64
; Make sure we are using the operands defined by the ABI
; CHECK ld1.1d { v0, v1, v2 }, [x0]
; CHECK-NEXT ret
@ -307,7 +307,7 @@ define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind {
}
define %struct.__neon_float64x1x4_t @ld4_1df64(double* %A) nounwind {
; CHECK: ld4_1df64
; CHECK-LABEL: ld4_1df64
; Make sure we are using the operands defined by the ABI
; CHECK ld1.1d { v0, v1, v2, v3 }, [x0]
; CHECK-NEXT ret
@ -800,7 +800,7 @@ declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3r.v2i64.p0i64(i64*) nounw
declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) {
; CHECK: ld1_16b
; CHECK-LABEL: ld1_16b
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.b { v0 }[0], [x0]
; CHECK-NEXT ret
@ -810,7 +810,7 @@ define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) {
}
define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) {
; CHECK: ld1_8h
; CHECK-LABEL: ld1_8h
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.h { v0 }[0], [x0]
; CHECK-NEXT ret
@ -820,7 +820,7 @@ define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) {
}
define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) {
; CHECK: ld1_4s
; CHECK-LABEL: ld1_4s
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.s { v0 }[0], [x0]
; CHECK-NEXT ret
@ -829,8 +829,18 @@ define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) {
ret <4 x i32> %tmp2
}
define <4 x float> @ld1_4s_float(<4 x float> %V, float* %bar) {
; CHECK-LABEL: ld1_4s_float:
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.s { v0 }[0], [x0]
; CHECK-NEXT ret
%tmp1 = load float* %bar
%tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
ret <4 x float> %tmp2
}
define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) {
; CHECK: ld1_2d
; CHECK-LABEL: ld1_2d
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.d { v0 }[0], [x0]
; CHECK-NEXT ret
@ -839,8 +849,18 @@ define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) {
ret <2 x i64> %tmp2
}
define <2 x double> @ld1_2d_double(<2 x double> %V, double* %bar) {
; CHECK-LABEL: ld1_2d_double:
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.d { v0 }[0], [x0]
; CHECK-NEXT ret
%tmp1 = load double* %bar
%tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
ret <2 x double> %tmp2
}
define <1 x i64> @ld1_1d(<1 x i64>* %p) {
; CHECK: ld1_1d
; CHECK-LABEL: ld1_1d
; Make sure we are using the operands defined by the ABI
; CHECK: ldr [[REG:d[0-9]+]], [x0]
; CHECK-NEXT: ret
@ -848,6 +868,46 @@ define <1 x i64> @ld1_1d(<1 x i64>* %p) {
ret <1 x i64> %tmp
}
define <8 x i8> @ld1_8b(<8 x i8> %V, i8* %bar) {
; CHECK-LABEL: ld1_8b
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.b { v0 }[0], [x0]
; CHECK-NEXT ret
%tmp1 = load i8* %bar
%tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
ret <8 x i8> %tmp2
}
define <4 x i16> @ld1_4h(<4 x i16> %V, i16* %bar) {
; CHECK-LABEL: ld1_4h
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.h { v0 }[0], [x0]
; CHECK-NEXT ret
%tmp1 = load i16* %bar
%tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
ret <4 x i16> %tmp2
}
define <2 x i32> @ld1_2s(<2 x i32> %V, i32* %bar) {
; CHECK-LABEL: ld1_2s:
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.s { v0 }[0], [x0]
; CHECK-NEXT ret
%tmp1 = load i32* %bar
%tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
ret <2 x i32> %tmp2
}
define <2 x float> @ld1_2s_float(<2 x float> %V, float* %bar) {
; CHECK-LABEL: ld1_2s_float:
; Make sure we are using the operands defined by the ABI
; CHECK: ld1.s { v0 }[0], [x0]
; CHECK-NEXT ret
%tmp1 = load float* %bar
%tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0
ret <2 x float> %tmp2
}
; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s
define void @ld1r_2s_from_dup(i8* nocapture %a, i8* nocapture %b, i16* nocapture %diff) nounwind ssp {
@ -882,7 +942,7 @@ entry:
; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal
define <4 x float> @ld1r_4s_float(float* nocapture %x) {
entry:
; CHECK: ld1r_4s_float
; CHECK-LABEL: ld1r_4s_float
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.4s { v0 }, [x0]
; CHECK-NEXT ret
@ -896,7 +956,7 @@ entry:
define <2 x float> @ld1r_2s_float(float* nocapture %x) {
entry:
; CHECK: ld1r_2s_float
; CHECK-LABEL: ld1r_2s_float
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.2s { v0 }, [x0]
; CHECK-NEXT ret
@ -908,7 +968,7 @@ entry:
define <2 x double> @ld1r_2d_double(double* nocapture %x) {
entry:
; CHECK: ld1r_2d_double
; CHECK-LABEL: ld1r_2d_double
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.2d { v0 }, [x0]
; CHECK-NEXT ret
@ -920,7 +980,7 @@ entry:
define <1 x double> @ld1r_1d_double(double* nocapture %x) {
entry:
; CHECK: ld1r_1d_double
; CHECK-LABEL: ld1r_1d_double
; Make sure we are using the operands defined by the ABI
; CHECK: ldr d0, [x0]
; CHECK-NEXT ret
@ -931,7 +991,7 @@ entry:
define <4 x float> @ld1r_4s_float_shuff(float* nocapture %x) {
entry:
; CHECK: ld1r_4s_float_shuff
; CHECK-LABEL: ld1r_4s_float_shuff
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.4s { v0 }, [x0]
; CHECK-NEXT ret
@ -943,7 +1003,7 @@ entry:
define <2 x float> @ld1r_2s_float_shuff(float* nocapture %x) {
entry:
; CHECK: ld1r_2s_float_shuff
; CHECK-LABEL: ld1r_2s_float_shuff
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.2s { v0 }, [x0]
; CHECK-NEXT ret
@ -955,7 +1015,7 @@ entry:
define <2 x double> @ld1r_2d_double_shuff(double* nocapture %x) {
entry:
; CHECK: ld1r_2d_double_shuff
; CHECK-LABEL: ld1r_2d_double_shuff
; Make sure we are using the operands defined by the ABI
; CHECK: ld1r.2d { v0 }, [x0]
; CHECK-NEXT ret
@ -967,7 +1027,7 @@ entry:
define <1 x double> @ld1r_1d_double_shuff(double* nocapture %x) {
entry:
; CHECK: ld1r_1d_double_shuff
; CHECK-LABEL: ld1r_1d_double_shuff
; Make sure we are using the operands defined by the ABI
; CHECK: ldr d0, [x0]
; CHECK-NEXT ret

View File

@ -1,7 +1,7 @@
; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
define void @st1lane_16b(<16 x i8> %A, i8* %D) {
; CHECK: st1lane_16b
; CHECK-LABEL: st1lane_16b
; CHECK: st1.b
%tmp = extractelement <16 x i8> %A, i32 1
store i8 %tmp, i8* %D
@ -9,7 +9,7 @@ define void @st1lane_16b(<16 x i8> %A, i8* %D) {
}
define void @st1lane_8h(<8 x i16> %A, i16* %D) {
; CHECK: st1lane_8h
; CHECK-LABEL: st1lane_8h
; CHECK: st1.h
%tmp = extractelement <8 x i16> %A, i32 1
store i16 %tmp, i16* %D
@ -17,44 +17,92 @@ define void @st1lane_8h(<8 x i16> %A, i16* %D) {
}
define void @st1lane_4s(<4 x i32> %A, i32* %D) {
; CHECK: st1lane_4s
; CHECK-LABEL: st1lane_4s
; CHECK: st1.s
%tmp = extractelement <4 x i32> %A, i32 1
store i32 %tmp, i32* %D
ret void
}
define void @st1lane_4s_float(<4 x float> %A, float* %D) {
; CHECK-LABEL: st1lane_4s_float
; CHECK: st1.s
%tmp = extractelement <4 x float> %A, i32 1
store float %tmp, float* %D
ret void
}
define void @st1lane_2d(<2 x i64> %A, i64* %D) {
; CHECK: st1lane_2d
; CHECK-LABEL: st1lane_2d
; CHECK: st1.d
%tmp = extractelement <2 x i64> %A, i32 1
store i64 %tmp, i64* %D
ret void
}
define void @st1lane_2d_double(<2 x double> %A, double* %D) {
; CHECK-LABEL: st1lane_2d_double
; CHECK: st1.d
%tmp = extractelement <2 x double> %A, i32 1
store double %tmp, double* %D
ret void
}
define void @st1lane_8b(<8 x i8> %A, i8* %D) {
; CHECK-LABEL: st1lane_8b
; CHECK: st1.b
%tmp = extractelement <8 x i8> %A, i32 1
store i8 %tmp, i8* %D
ret void
}
define void @st1lane_4h(<4 x i16> %A, i16* %D) {
; CHECK-LABEL: st1lane_4h
; CHECK: st1.h
%tmp = extractelement <4 x i16> %A, i32 1
store i16 %tmp, i16* %D
ret void
}
define void @st1lane_2s(<2 x i32> %A, i32* %D) {
; CHECK-LABEL: st1lane_2s
; CHECK: st1.s
%tmp = extractelement <2 x i32> %A, i32 1
store i32 %tmp, i32* %D
ret void
}
define void @st1lane_2s_float(<2 x float> %A, float* %D) {
; CHECK-LABEL: st1lane_2s_float
; CHECK: st1.s
%tmp = extractelement <2 x float> %A, i32 1
store float %tmp, float* %D
ret void
}
define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
; CHECK: st2lane_16b
; CHECK-LABEL: st2lane_16b
; CHECK: st2.b
call void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D)
ret void
}
define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) {
; CHECK: st2lane_8h
; CHECK-LABEL: st2lane_8h
; CHECK: st2.h
call void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D)
ret void
}
define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) {
; CHECK: st2lane_4s
; CHECK-LABEL: st2lane_4s
; CHECK: st2.s
call void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D)
ret void
}
define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) {
; CHECK: st2lane_2d
; CHECK-LABEL: st2lane_2d
; CHECK: st2.d
call void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D)
ret void
@ -66,28 +114,28 @@ declare void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32
declare void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) {
; CHECK: st3lane_16b
; CHECK-LABEL: st3lane_16b
; CHECK: st3.b
call void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D)
ret void
}
define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) {
; CHECK: st3lane_8h
; CHECK-LABEL: st3lane_8h
; CHECK: st3.h
call void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D)
ret void
}
define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) {
; CHECK: st3lane_4s
; CHECK-LABEL: st3lane_4s
; CHECK: st3.s
call void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D)
ret void
}
define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) {
; CHECK: st3lane_2d
; CHECK-LABEL: st3lane_2d
; CHECK: st3.d
call void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D)
ret void
@ -99,28 +147,28 @@ declare void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32
declare void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) {
; CHECK: st4lane_16b
; CHECK-LABEL: st4lane_16b
; CHECK: st4.b
call void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E)
ret void
}
define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) {
; CHECK: st4lane_8h
; CHECK-LABEL: st4lane_8h
; CHECK: st4.h
call void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E)
ret void
}
define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) {
; CHECK: st4lane_4s
; CHECK-LABEL: st4lane_4s
; CHECK: st4.s
call void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E)
ret void
}
define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) {
; CHECK: st4lane_2d
; CHECK-LABEL: st4lane_2d
; CHECK: st4.d
call void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E)
ret void
@ -133,21 +181,21 @@ declare void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64
define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind {
; CHECK: st2_8b
; CHECK-LABEL: st2_8b
; CHECK st2.8b
call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P)
ret void
}
define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind {
; CHECK: st3_8b
; CHECK-LABEL: st3_8b
; CHECK st3.8b
call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P)
ret void
}
define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind {
; CHECK: st4_8b
; CHECK-LABEL: st4_8b
; CHECK st4.8b
call void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P)
ret void
@ -158,21 +206,21 @@ declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) n
declare void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind {
; CHECK: st2_16b
; CHECK-LABEL: st2_16b
; CHECK st2.16b
call void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P)
ret void
}
define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind {
; CHECK: st3_16b
; CHECK-LABEL: st3_16b
; CHECK st3.16b
call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P)
ret void
}
define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind {
; CHECK: st4_16b
; CHECK-LABEL: st4_16b
; CHECK st4.16b
call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P)
ret void
@ -183,21 +231,21 @@ declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8
declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind {
; CHECK: st2_4h
; CHECK-LABEL: st2_4h
; CHECK st2.4h
call void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P)
ret void
}
define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind {
; CHECK: st3_4h
; CHECK-LABEL: st3_4h
; CHECK st3.4h
call void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P)
ret void
}
define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind {
; CHECK: st4_4h
; CHECK-LABEL: st4_4h
; CHECK st4.4h
call void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P)
ret void
@ -208,21 +256,21 @@ declare void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i
declare void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind {
; CHECK: st2_8h
; CHECK-LABEL: st2_8h
; CHECK st2.8h
call void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P)
ret void
}
define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind {
; CHECK: st3_8h
; CHECK-LABEL: st3_8h
; CHECK st3.8h
call void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P)
ret void
}
define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind {
; CHECK: st4_8h
; CHECK-LABEL: st4_8h
; CHECK st4.8h
call void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P)
ret void
@ -233,21 +281,21 @@ declare void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i
declare void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind {
; CHECK: st2_2s
; CHECK-LABEL: st2_2s
; CHECK st2.2s
call void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P)
ret void
}
define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind {
; CHECK: st3_2s
; CHECK-LABEL: st3_2s
; CHECK st3.2s
call void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P)
ret void
}
define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind {
; CHECK: st4_2s
; CHECK-LABEL: st4_2s
; CHECK st4.2s
call void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P)
ret void
@ -258,21 +306,21 @@ declare void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i
declare void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind {
; CHECK: st2_4s
; CHECK-LABEL: st2_4s
; CHECK st2.4s
call void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P)
ret void
}
define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind {
; CHECK: st3_4s
; CHECK-LABEL: st3_4s
; CHECK st3.4s
call void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P)
ret void
}
define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind {
; CHECK: st4_4s
; CHECK-LABEL: st4_4s
; CHECK st4.4s
call void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P)
ret void
@ -283,21 +331,21 @@ declare void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i
declare void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind {
; CHECK: st2_1d
; CHECK-LABEL: st2_1d
; CHECK st1.2d
call void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P)
ret void
}
define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind {
; CHECK: st3_1d
; CHECK-LABEL: st3_1d
; CHECK st1.3d
call void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P)
ret void
}
define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind {
; CHECK: st4_1d
; CHECK-LABEL: st4_1d
; CHECK st1.4d
call void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P)
ret void
@ -308,21 +356,21 @@ declare void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i
declare void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind {
; CHECK: st2_2d
; CHECK-LABEL: st2_2d
; CHECK st2.2d
call void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P)
ret void
}
define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind {
; CHECK: st3_2d
; CHECK-LABEL: st3_2d
; CHECK st2.3d
call void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P)
ret void
}
define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind {
; CHECK: st4_2d
; CHECK-LABEL: st4_2d
; CHECK st2.4d
call void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P)
ret void