diff --git a/llvm/lib/Target/ARM64/ARM64InstrFormats.td b/llvm/lib/Target/ARM64/ARM64InstrFormats.td index 440bf4f3a190..cf8c5037f6ba 100644 --- a/llvm/lib/Target/ARM64/ARM64InstrFormats.td +++ b/llvm/lib/Target/ARM64/ARM64InstrFormats.td @@ -7971,8 +7971,7 @@ multiclass SIMDLdSingleSTied opcode, bits<2> size,string asm, } let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in multiclass SIMDLdSingleDTied opcode, bits<2> size, string asm, - RegisterOperand listtype, - RegisterOperand GPR64pi> { + RegisterOperand listtype, RegisterOperand GPR64pi> { def i64 : SIMDLdStSingleDTied<1, R, opcode, size, asm, (outs listtype:$dst), (ins listtype:$Vt, VectorIndexD:$idx, @@ -7985,12 +7984,10 @@ multiclass SIMDLdSingleDTied opcode, bits<2> size, string asm, } let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in multiclass SIMDStSingleB opcode, string asm, - RegisterOperand listtype, list pattern, - RegisterOperand GPR64pi> { + RegisterOperand listtype, RegisterOperand GPR64pi> { def i8 : SIMDLdStSingleB<0, R, opcode, asm, (outs), (ins listtype:$Vt, VectorIndexB:$idx, - am_simdnoindex:$vaddr), - pattern>; + am_simdnoindex:$vaddr), []>; def i8_POST : SIMDLdStSingleBPost<0, R, opcode, asm, (outs), (ins listtype:$Vt, VectorIndexB:$idx, @@ -7998,12 +7995,10 @@ multiclass SIMDStSingleB opcode, string asm, } let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in multiclass SIMDStSingleH opcode, bit size, string asm, - RegisterOperand listtype, list pattern, - RegisterOperand GPR64pi> { + RegisterOperand listtype, RegisterOperand GPR64pi> { def i16 : SIMDLdStSingleH<0, R, opcode, size, asm, (outs), (ins listtype:$Vt, VectorIndexH:$idx, - am_simdnoindex:$vaddr), - pattern>; + am_simdnoindex:$vaddr), []>; def i16_POST : SIMDLdStSingleHPost<0, R, opcode, size, asm, (outs), (ins listtype:$Vt, VectorIndexH:$idx, @@ -8011,12 +8006,10 @@ multiclass SIMDStSingleH opcode, bit size, string asm, } let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in multiclass SIMDStSingleS opcode, bits<2> size,string asm, - RegisterOperand listtype, list pattern, - RegisterOperand GPR64pi> { + RegisterOperand listtype, RegisterOperand GPR64pi> { def i32 : SIMDLdStSingleS<0, R, opcode, size, asm, (outs), (ins listtype:$Vt, VectorIndexS:$idx, - am_simdnoindex:$vaddr), - pattern>; + am_simdnoindex:$vaddr), []>; def i32_POST : SIMDLdStSingleSPost<0, R, opcode, size, asm, (outs), (ins listtype:$Vt, VectorIndexS:$idx, @@ -8024,11 +8017,10 @@ multiclass SIMDStSingleS opcode, bits<2> size,string asm, } let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in multiclass SIMDStSingleD opcode, bits<2> size, string asm, - RegisterOperand listtype, list pattern, - RegisterOperand GPR64pi> { + RegisterOperand listtype, RegisterOperand GPR64pi> { def i64 : SIMDLdStSingleD<0, R, opcode, size, asm, (outs), (ins listtype:$Vt, VectorIndexD:$idx, - am_simdnoindex:$vaddr), pattern>; + am_simdnoindex:$vaddr), []>; def i64_POST : SIMDLdStSingleDPost<0, R, opcode, size, asm, (outs), (ins listtype:$Vt, VectorIndexD:$idx, diff --git a/llvm/lib/Target/ARM64/ARM64InstrInfo.td b/llvm/lib/Target/ARM64/ARM64InstrInfo.td index 9fc4e7a2ea4c..c9a714b8dda6 100644 --- a/llvm/lib/Target/ARM64/ARM64InstrInfo.td +++ b/llvm/lib/Target/ARM64/ARM64InstrInfo.td @@ -4087,18 +4087,32 @@ def : Pat<(v2f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))), def : Pat<(v1f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))), (LD1Rv1d am_simdnoindex:$vaddr)>; -def : Pat<(vector_insert (v16i8 VecListOne128:$Rd), - (i32 (extloadi8 am_simdnoindex:$vaddr)), VectorIndexB:$idx), - (LD1i8 VecListOne128:$Rd, VectorIndexB:$idx, am_simdnoindex:$vaddr)>; -def : Pat<(vector_insert (v8i16 VecListOne128:$Rd), - (i32 (extloadi16 am_simdnoindex:$vaddr)), VectorIndexH:$idx), - (LD1i16 VecListOne128:$Rd, VectorIndexH:$idx, am_simdnoindex:$vaddr)>; -def : Pat<(vector_insert (v4i32 VecListOne128:$Rd), - (i32 (load am_simdnoindex:$vaddr)), VectorIndexS:$idx), - (LD1i32 VecListOne128:$Rd, VectorIndexS:$idx, am_simdnoindex:$vaddr)>; -def : Pat<(vector_insert (v2i64 VecListOne128:$Rd), - (i64 (load am_simdnoindex:$vaddr)), VectorIndexD:$idx), - (LD1i64 VecListOne128:$Rd, VectorIndexD:$idx, am_simdnoindex:$vaddr)>; +class Ld1Lane128Pat + : Pat<(vector_insert (VTy VecListOne128:$Rd), + (STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx), + (LD1 VecListOne128:$Rd, VecIndex:$idx, am_simdnoindex:$vaddr)>; + +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; +def : Ld1Lane128Pat; + +class Ld1Lane64Pat + : Pat<(vector_insert (VTy VecListOne64:$Rd), + (STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx), + (EXTRACT_SUBREG + (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub), + VecIndex:$idx, am_simdnoindex:$vaddr), + dsub)>; + +def : Ld1Lane64Pat; +def : Ld1Lane64Pat; +def : Ld1Lane64Pat; +def : Ld1Lane64Pat; defm LD1 : SIMDLdSt1SingleAliases<"ld1">; @@ -4107,38 +4121,53 @@ defm LD3 : SIMDLdSt3SingleAliases<"ld3">; defm LD4 : SIMDLdSt4SingleAliases<"ld4">; // Stores -let AddedComplexity = 8 in { -defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, - [(truncstorei8 - (i32 (vector_extract (v16i8 VecListOneb:$Vt), VectorIndexB:$idx)), - am_simdnoindex:$vaddr)], GPR64pi1>; -defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, - [(truncstorei16 - (i32 (vector_extract (v8i16 VecListOneh:$Vt), VectorIndexH:$idx)), - am_simdnoindex:$vaddr)], GPR64pi2>; -defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, - [(store - (i32 (vector_extract (v4i32 VecListOnes:$Vt), VectorIndexS:$idx)), - am_simdnoindex:$vaddr)], GPR64pi4>; -defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, - [(store - (i64 (vector_extract (v2i64 VecListOned:$Vt), VectorIndexD:$idx)), - am_simdnoindex:$vaddr)], GPR64pi8>; -} +defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>; +defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>; +defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>; +defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>; + +let AddedComplexity = 8 in +class St1Lane128Pat + : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)), + am_simdnoindex:$vaddr), + (ST1 VecListOne128:$Vt, VecIndex:$idx, am_simdnoindex:$vaddr)>; + +def : St1Lane128Pat; +def : St1Lane128Pat; +def : St1Lane128Pat; +def : St1Lane128Pat; +def : St1Lane128Pat; +def : St1Lane128Pat; + +let AddedComplexity = 8 in +class St1Lane64Pat + : Pat<(scalar_store + (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)), + am_simdnoindex:$vaddr), + (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub), + VecIndex:$idx, am_simdnoindex:$vaddr)>; + +def : St1Lane64Pat; +def : St1Lane64Pat; +def : St1Lane64Pat; +def : St1Lane64Pat; let mayStore = 1, neverHasSideEffects = 1 in { -defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, [], GPR64pi2>; -defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, [], GPR64pi4>; -defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, [], GPR64pi8>; -defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, [], GPR64pi16>; -defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, [], GPR64pi3>; -defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, [], GPR64pi6>; -defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, [], GPR64pi12>; -defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, [], GPR64pi24>; -defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, [], GPR64pi4>; -defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, [], GPR64pi8>; -defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, [], GPR64pi16>; -defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, [], GPR64pi32>; +defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>; +defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>; +defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>; +defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>; +defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>; +defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>; +defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>; +defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>; +defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>; +defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>; +defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>; +defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>; } defm ST1 : SIMDLdSt1SingleAliases<"st1">; diff --git a/llvm/test/CodeGen/ARM64/ld1.ll b/llvm/test/CodeGen/ARM64/ld1.ll index f2fd55ce2c78..61836a10a806 100644 --- a/llvm/test/CodeGen/ARM64/ld1.ll +++ b/llvm/test/CodeGen/ARM64/ld1.ll @@ -5,7 +5,7 @@ %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind { -; CHECK: ld2_8b +; CHECK-LABEL: ld2_8b ; Make sure we are loading into the results defined by the ABI (i.e., v0, v1) ; and from the argument of the function also defined by ABI (i.e., x0) ; CHECK ld2.8b { v0, v1 }, [x0] @@ -15,7 +15,7 @@ define %struct.__neon_int8x8x2_t @ld2_8b(i8* %A) nounwind { } define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind { -; CHECK: ld3_8b +; CHECK-LABEL: ld3_8b ; Make sure we are using the operands defined by the ABI ; CHECK ld3.8b { v0, v1, v2 }, [x0] ; CHECK-NEXT ret @@ -24,7 +24,7 @@ define %struct.__neon_int8x8x3_t @ld3_8b(i8* %A) nounwind { } define %struct.__neon_int8x8x4_t @ld4_8b(i8* %A) nounwind { -; CHECK: ld4_8b +; CHECK-LABEL: ld4_8b ; Make sure we are using the operands defined by the ABI ; CHECK ld4.8b { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret @@ -41,7 +41,7 @@ declare %struct.__neon_int8x8x4_t @llvm.arm64.neon.ld4.v8i8.p0i8(i8*) nounwind r %struct.__neon_int8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind { -; CHECK: ld2_16b +; CHECK-LABEL: ld2_16b ; Make sure we are using the operands defined by the ABI ; CHECK ld2.16b { v0, v1 }, [x0] ; CHECK-NEXT ret @@ -50,7 +50,7 @@ define %struct.__neon_int8x16x2_t @ld2_16b(i8* %A) nounwind { } define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind { -; CHECK: ld3_16b +; CHECK-LABEL: ld3_16b ; Make sure we are using the operands defined by the ABI ; CHECK ld3.16b { v0, v1, v2 }, [x0] ; CHECK-NEXT ret @@ -59,7 +59,7 @@ define %struct.__neon_int8x16x3_t @ld3_16b(i8* %A) nounwind { } define %struct.__neon_int8x16x4_t @ld4_16b(i8* %A) nounwind { -; CHECK: ld4_16b +; CHECK-LABEL: ld4_16b ; Make sure we are using the operands defined by the ABI ; CHECK ld4.16b { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret @@ -76,7 +76,7 @@ declare %struct.__neon_int8x16x4_t @llvm.arm64.neon.ld4.v16i8.p0i8(i8*) nounwind %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind { -; CHECK: ld2_4h +; CHECK-LABEL: ld2_4h ; Make sure we are using the operands defined by the ABI ; CHECK ld2.4h { v0, v1 }, [x0] ; CHECK-NEXT ret @@ -85,7 +85,7 @@ define %struct.__neon_int16x4x2_t @ld2_4h(i16* %A) nounwind { } define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind { -; CHECK: ld3_4h +; CHECK-LABEL: ld3_4h ; Make sure we are using the operands defined by the ABI ; CHECK ld3.4h { v0, v1, v2 }, [x0] ; CHECK-NEXT ret @@ -94,7 +94,7 @@ define %struct.__neon_int16x4x3_t @ld3_4h(i16* %A) nounwind { } define %struct.__neon_int16x4x4_t @ld4_4h(i16* %A) nounwind { -; CHECK: ld4_4h +; CHECK-LABEL: ld4_4h ; Make sure we are using the operands defined by the ABI ; CHECK ld4.4h { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret @@ -111,7 +111,7 @@ declare %struct.__neon_int16x4x4_t @llvm.arm64.neon.ld4.v4i16.p0i16(i16*) nounwi %struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind { -; CHECK: ld2_8h +; CHECK-LABEL: ld2_8h ; Make sure we are using the operands defined by the ABI ; CHECK ld2.8h { v0, v1 }, [x0] ; CHECK-NEXT ret @@ -120,7 +120,7 @@ define %struct.__neon_int16x8x2_t @ld2_8h(i16* %A) nounwind { } define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind { -; CHECK: ld3_8h +; CHECK-LABEL: ld3_8h ; Make sure we are using the operands defined by the ABI ; CHECK ld3.8h { v0, v1, v2 }, [x0] ; CHECK-NEXT ret @@ -129,7 +129,7 @@ define %struct.__neon_int16x8x3_t @ld3_8h(i16* %A) nounwind { } define %struct.__neon_int16x8x4_t @ld4_8h(i16* %A) nounwind { -; CHECK: ld4_8h +; CHECK-LABEL: ld4_8h ; Make sure we are using the operands defined by the ABI ; CHECK ld4.8h { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret @@ -146,7 +146,7 @@ declare %struct.__neon_int16x8x4_t @llvm.arm64.neon.ld4.v8i16.p0i16(i16*) nounwi %struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind { -; CHECK: ld2_2s +; CHECK-LABEL: ld2_2s ; Make sure we are using the operands defined by the ABI ; CHECK ld2.2s { v0, v1 }, [x0] ; CHECK-NEXT ret @@ -155,7 +155,7 @@ define %struct.__neon_int32x2x2_t @ld2_2s(i32* %A) nounwind { } define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind { -; CHECK: ld3_2s +; CHECK-LABEL: ld3_2s ; Make sure we are using the operands defined by the ABI ; CHECK ld3.2s { v0, v1, v2 }, [x0] ; CHECK-NEXT ret @@ -164,7 +164,7 @@ define %struct.__neon_int32x2x3_t @ld3_2s(i32* %A) nounwind { } define %struct.__neon_int32x2x4_t @ld4_2s(i32* %A) nounwind { -; CHECK: ld4_2s +; CHECK-LABEL: ld4_2s ; Make sure we are using the operands defined by the ABI ; CHECK ld4.2s { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret @@ -181,7 +181,7 @@ declare %struct.__neon_int32x2x4_t @llvm.arm64.neon.ld4.v2i32.p0i32(i32*) nounwi %struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind { -; CHECK: ld2_4s +; CHECK-LABEL: ld2_4s ; Make sure we are using the operands defined by the ABI ; CHECK ld2.4s { v0, v1 }, [x0] ; CHECK-NEXT ret @@ -190,7 +190,7 @@ define %struct.__neon_int32x4x2_t @ld2_4s(i32* %A) nounwind { } define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind { -; CHECK: ld3_4s +; CHECK-LABEL: ld3_4s ; Make sure we are using the operands defined by the ABI ; CHECK ld3.4s { v0, v1, v2 }, [x0] ; CHECK-NEXT ret @@ -199,7 +199,7 @@ define %struct.__neon_int32x4x3_t @ld3_4s(i32* %A) nounwind { } define %struct.__neon_int32x4x4_t @ld4_4s(i32* %A) nounwind { -; CHECK: ld4_4s +; CHECK-LABEL: ld4_4s ; Make sure we are using the operands defined by the ABI ; CHECK ld4.4s { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret @@ -216,7 +216,7 @@ declare %struct.__neon_int32x4x4_t @llvm.arm64.neon.ld4.v4i32.p0i32(i32*) nounwi %struct.__neon_int64x2x4_t = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind { -; CHECK: ld2_2d +; CHECK-LABEL: ld2_2d ; Make sure we are using the operands defined by the ABI ; CHECK ld2.2d { v0, v1 }, [x0] ; CHECK-NEXT ret @@ -225,7 +225,7 @@ define %struct.__neon_int64x2x2_t @ld2_2d(i64* %A) nounwind { } define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind { -; CHECK: ld3_2d +; CHECK-LABEL: ld3_2d ; Make sure we are using the operands defined by the ABI ; CHECK ld3.2d { v0, v1, v2 }, [x0] ; CHECK-NEXT ret @@ -234,7 +234,7 @@ define %struct.__neon_int64x2x3_t @ld3_2d(i64* %A) nounwind { } define %struct.__neon_int64x2x4_t @ld4_2d(i64* %A) nounwind { -; CHECK: ld4_2d +; CHECK-LABEL: ld4_2d ; Make sure we are using the operands defined by the ABI ; CHECK ld4.2d { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret @@ -252,7 +252,7 @@ declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4.v2i64.p0i64(i64*) nounwi define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind { -; CHECK: ld2_1di64 +; CHECK-LABEL: ld2_1di64 ; Make sure we are using the operands defined by the ABI ; CHECK ld1.1d { v0, v1 }, [x0] ; CHECK-NEXT ret @@ -261,7 +261,7 @@ define %struct.__neon_int64x1x2_t @ld2_1di64(i64* %A) nounwind { } define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind { -; CHECK: ld3_1di64 +; CHECK-LABEL: ld3_1di64 ; Make sure we are using the operands defined by the ABI ; CHECK ld1.1d { v0, v1, v2 }, [x0] ; CHECK-NEXT ret @@ -270,7 +270,7 @@ define %struct.__neon_int64x1x3_t @ld3_1di64(i64* %A) nounwind { } define %struct.__neon_int64x1x4_t @ld4_1di64(i64* %A) nounwind { -; CHECK: ld4_1di64 +; CHECK-LABEL: ld4_1di64 ; Make sure we are using the operands defined by the ABI ; CHECK ld1.1d { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret @@ -289,7 +289,7 @@ declare %struct.__neon_int64x1x4_t @llvm.arm64.neon.ld4.v1i64.p0i64(i64*) nounwi define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind { -; CHECK: ld2_1df64 +; CHECK-LABEL: ld2_1df64 ; Make sure we are using the operands defined by the ABI ; CHECK ld1.1d { v0, v1 }, [x0] ; CHECK-NEXT ret @@ -298,7 +298,7 @@ define %struct.__neon_float64x1x2_t @ld2_1df64(double* %A) nounwind { } define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind { -; CHECK: ld3_1df64 +; CHECK-LABEL: ld3_1df64 ; Make sure we are using the operands defined by the ABI ; CHECK ld1.1d { v0, v1, v2 }, [x0] ; CHECK-NEXT ret @@ -307,7 +307,7 @@ define %struct.__neon_float64x1x3_t @ld3_1df64(double* %A) nounwind { } define %struct.__neon_float64x1x4_t @ld4_1df64(double* %A) nounwind { -; CHECK: ld4_1df64 +; CHECK-LABEL: ld4_1df64 ; Make sure we are using the operands defined by the ABI ; CHECK ld1.1d { v0, v1, v2, v3 }, [x0] ; CHECK-NEXT ret @@ -800,7 +800,7 @@ declare %struct.__neon_int64x2x3_t @llvm.arm64.neon.ld3r.v2i64.p0i64(i64*) nounw declare %struct.__neon_int64x2x4_t @llvm.arm64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) { -; CHECK: ld1_16b +; CHECK-LABEL: ld1_16b ; Make sure we are using the operands defined by the ABI ; CHECK: ld1.b { v0 }[0], [x0] ; CHECK-NEXT ret @@ -810,7 +810,7 @@ define <16 x i8> @ld1_16b(<16 x i8> %V, i8* %bar) { } define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) { -; CHECK: ld1_8h +; CHECK-LABEL: ld1_8h ; Make sure we are using the operands defined by the ABI ; CHECK: ld1.h { v0 }[0], [x0] ; CHECK-NEXT ret @@ -820,7 +820,7 @@ define <8 x i16> @ld1_8h(<8 x i16> %V, i16* %bar) { } define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) { -; CHECK: ld1_4s +; CHECK-LABEL: ld1_4s ; Make sure we are using the operands defined by the ABI ; CHECK: ld1.s { v0 }[0], [x0] ; CHECK-NEXT ret @@ -829,8 +829,18 @@ define <4 x i32> @ld1_4s(<4 x i32> %V, i32* %bar) { ret <4 x i32> %tmp2 } +define <4 x float> @ld1_4s_float(<4 x float> %V, float* %bar) { +; CHECK-LABEL: ld1_4s_float: +; Make sure we are using the operands defined by the ABI +; CHECK: ld1.s { v0 }[0], [x0] +; CHECK-NEXT ret + %tmp1 = load float* %bar + %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0 + ret <4 x float> %tmp2 +} + define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) { -; CHECK: ld1_2d +; CHECK-LABEL: ld1_2d ; Make sure we are using the operands defined by the ABI ; CHECK: ld1.d { v0 }[0], [x0] ; CHECK-NEXT ret @@ -839,8 +849,18 @@ define <2 x i64> @ld1_2d(<2 x i64> %V, i64* %bar) { ret <2 x i64> %tmp2 } +define <2 x double> @ld1_2d_double(<2 x double> %V, double* %bar) { +; CHECK-LABEL: ld1_2d_double: +; Make sure we are using the operands defined by the ABI +; CHECK: ld1.d { v0 }[0], [x0] +; CHECK-NEXT ret + %tmp1 = load double* %bar + %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0 + ret <2 x double> %tmp2 +} + define <1 x i64> @ld1_1d(<1 x i64>* %p) { -; CHECK: ld1_1d +; CHECK-LABEL: ld1_1d ; Make sure we are using the operands defined by the ABI ; CHECK: ldr [[REG:d[0-9]+]], [x0] ; CHECK-NEXT: ret @@ -848,6 +868,46 @@ define <1 x i64> @ld1_1d(<1 x i64>* %p) { ret <1 x i64> %tmp } +define <8 x i8> @ld1_8b(<8 x i8> %V, i8* %bar) { +; CHECK-LABEL: ld1_8b +; Make sure we are using the operands defined by the ABI +; CHECK: ld1.b { v0 }[0], [x0] +; CHECK-NEXT ret + %tmp1 = load i8* %bar + %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0 + ret <8 x i8> %tmp2 +} + +define <4 x i16> @ld1_4h(<4 x i16> %V, i16* %bar) { +; CHECK-LABEL: ld1_4h +; Make sure we are using the operands defined by the ABI +; CHECK: ld1.h { v0 }[0], [x0] +; CHECK-NEXT ret + %tmp1 = load i16* %bar + %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0 + ret <4 x i16> %tmp2 +} + +define <2 x i32> @ld1_2s(<2 x i32> %V, i32* %bar) { +; CHECK-LABEL: ld1_2s: +; Make sure we are using the operands defined by the ABI +; CHECK: ld1.s { v0 }[0], [x0] +; CHECK-NEXT ret + %tmp1 = load i32* %bar + %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0 + ret <2 x i32> %tmp2 +} + +define <2 x float> @ld1_2s_float(<2 x float> %V, float* %bar) { +; CHECK-LABEL: ld1_2s_float: +; Make sure we are using the operands defined by the ABI +; CHECK: ld1.s { v0 }[0], [x0] +; CHECK-NEXT ret + %tmp1 = load float* %bar + %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0 + ret <2 x float> %tmp2 +} + ; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s define void @ld1r_2s_from_dup(i8* nocapture %a, i8* nocapture %b, i16* nocapture %diff) nounwind ssp { @@ -882,7 +942,7 @@ entry: ; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal define <4 x float> @ld1r_4s_float(float* nocapture %x) { entry: -; CHECK: ld1r_4s_float +; CHECK-LABEL: ld1r_4s_float ; Make sure we are using the operands defined by the ABI ; CHECK: ld1r.4s { v0 }, [x0] ; CHECK-NEXT ret @@ -896,7 +956,7 @@ entry: define <2 x float> @ld1r_2s_float(float* nocapture %x) { entry: -; CHECK: ld1r_2s_float +; CHECK-LABEL: ld1r_2s_float ; Make sure we are using the operands defined by the ABI ; CHECK: ld1r.2s { v0 }, [x0] ; CHECK-NEXT ret @@ -908,7 +968,7 @@ entry: define <2 x double> @ld1r_2d_double(double* nocapture %x) { entry: -; CHECK: ld1r_2d_double +; CHECK-LABEL: ld1r_2d_double ; Make sure we are using the operands defined by the ABI ; CHECK: ld1r.2d { v0 }, [x0] ; CHECK-NEXT ret @@ -920,7 +980,7 @@ entry: define <1 x double> @ld1r_1d_double(double* nocapture %x) { entry: -; CHECK: ld1r_1d_double +; CHECK-LABEL: ld1r_1d_double ; Make sure we are using the operands defined by the ABI ; CHECK: ldr d0, [x0] ; CHECK-NEXT ret @@ -931,7 +991,7 @@ entry: define <4 x float> @ld1r_4s_float_shuff(float* nocapture %x) { entry: -; CHECK: ld1r_4s_float_shuff +; CHECK-LABEL: ld1r_4s_float_shuff ; Make sure we are using the operands defined by the ABI ; CHECK: ld1r.4s { v0 }, [x0] ; CHECK-NEXT ret @@ -943,7 +1003,7 @@ entry: define <2 x float> @ld1r_2s_float_shuff(float* nocapture %x) { entry: -; CHECK: ld1r_2s_float_shuff +; CHECK-LABEL: ld1r_2s_float_shuff ; Make sure we are using the operands defined by the ABI ; CHECK: ld1r.2s { v0 }, [x0] ; CHECK-NEXT ret @@ -955,7 +1015,7 @@ entry: define <2 x double> @ld1r_2d_double_shuff(double* nocapture %x) { entry: -; CHECK: ld1r_2d_double_shuff +; CHECK-LABEL: ld1r_2d_double_shuff ; Make sure we are using the operands defined by the ABI ; CHECK: ld1r.2d { v0 }, [x0] ; CHECK-NEXT ret @@ -967,7 +1027,7 @@ entry: define <1 x double> @ld1r_1d_double_shuff(double* nocapture %x) { entry: -; CHECK: ld1r_1d_double_shuff +; CHECK-LABEL: ld1r_1d_double_shuff ; Make sure we are using the operands defined by the ABI ; CHECK: ldr d0, [x0] ; CHECK-NEXT ret diff --git a/llvm/test/CodeGen/ARM64/st1.ll b/llvm/test/CodeGen/ARM64/st1.ll index 3c0d3ecc04c9..b9aafc60e7ba 100644 --- a/llvm/test/CodeGen/ARM64/st1.ll +++ b/llvm/test/CodeGen/ARM64/st1.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=arm64 -arm64-neon-syntax=apple -verify-machineinstrs | FileCheck %s define void @st1lane_16b(<16 x i8> %A, i8* %D) { -; CHECK: st1lane_16b +; CHECK-LABEL: st1lane_16b ; CHECK: st1.b %tmp = extractelement <16 x i8> %A, i32 1 store i8 %tmp, i8* %D @@ -9,7 +9,7 @@ define void @st1lane_16b(<16 x i8> %A, i8* %D) { } define void @st1lane_8h(<8 x i16> %A, i16* %D) { -; CHECK: st1lane_8h +; CHECK-LABEL: st1lane_8h ; CHECK: st1.h %tmp = extractelement <8 x i16> %A, i32 1 store i16 %tmp, i16* %D @@ -17,44 +17,92 @@ define void @st1lane_8h(<8 x i16> %A, i16* %D) { } define void @st1lane_4s(<4 x i32> %A, i32* %D) { -; CHECK: st1lane_4s +; CHECK-LABEL: st1lane_4s ; CHECK: st1.s %tmp = extractelement <4 x i32> %A, i32 1 store i32 %tmp, i32* %D ret void } +define void @st1lane_4s_float(<4 x float> %A, float* %D) { +; CHECK-LABEL: st1lane_4s_float +; CHECK: st1.s + %tmp = extractelement <4 x float> %A, i32 1 + store float %tmp, float* %D + ret void +} + define void @st1lane_2d(<2 x i64> %A, i64* %D) { -; CHECK: st1lane_2d +; CHECK-LABEL: st1lane_2d ; CHECK: st1.d %tmp = extractelement <2 x i64> %A, i32 1 store i64 %tmp, i64* %D ret void } +define void @st1lane_2d_double(<2 x double> %A, double* %D) { +; CHECK-LABEL: st1lane_2d_double +; CHECK: st1.d + %tmp = extractelement <2 x double> %A, i32 1 + store double %tmp, double* %D + ret void +} + +define void @st1lane_8b(<8 x i8> %A, i8* %D) { +; CHECK-LABEL: st1lane_8b +; CHECK: st1.b + %tmp = extractelement <8 x i8> %A, i32 1 + store i8 %tmp, i8* %D + ret void +} + +define void @st1lane_4h(<4 x i16> %A, i16* %D) { +; CHECK-LABEL: st1lane_4h +; CHECK: st1.h + %tmp = extractelement <4 x i16> %A, i32 1 + store i16 %tmp, i16* %D + ret void +} + +define void @st1lane_2s(<2 x i32> %A, i32* %D) { +; CHECK-LABEL: st1lane_2s +; CHECK: st1.s + %tmp = extractelement <2 x i32> %A, i32 1 + store i32 %tmp, i32* %D + ret void +} + +define void @st1lane_2s_float(<2 x float> %A, float* %D) { +; CHECK-LABEL: st1lane_2s_float +; CHECK: st1.s + %tmp = extractelement <2 x float> %A, i32 1 + store float %tmp, float* %D + ret void +} + define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) { -; CHECK: st2lane_16b +; CHECK-LABEL: st2lane_16b ; CHECK: st2.b call void @llvm.arm64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D) ret void } define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) { -; CHECK: st2lane_8h +; CHECK-LABEL: st2lane_8h ; CHECK: st2.h call void @llvm.arm64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D) ret void } define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) { -; CHECK: st2lane_4s +; CHECK-LABEL: st2lane_4s ; CHECK: st2.s call void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D) ret void } define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) { -; CHECK: st2lane_2d +; CHECK-LABEL: st2lane_2d ; CHECK: st2.d call void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D) ret void @@ -66,28 +114,28 @@ declare void @llvm.arm64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32 declare void @llvm.arm64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) { -; CHECK: st3lane_16b +; CHECK-LABEL: st3lane_16b ; CHECK: st3.b call void @llvm.arm64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D) ret void } define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) { -; CHECK: st3lane_8h +; CHECK-LABEL: st3lane_8h ; CHECK: st3.h call void @llvm.arm64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D) ret void } define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) { -; CHECK: st3lane_4s +; CHECK-LABEL: st3lane_4s ; CHECK: st3.s call void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D) ret void } define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) { -; CHECK: st3lane_2d +; CHECK-LABEL: st3lane_2d ; CHECK: st3.d call void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D) ret void @@ -99,28 +147,28 @@ declare void @llvm.arm64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32 declare void @llvm.arm64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) { -; CHECK: st4lane_16b +; CHECK-LABEL: st4lane_16b ; CHECK: st4.b call void @llvm.arm64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E) ret void } define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) { -; CHECK: st4lane_8h +; CHECK-LABEL: st4lane_8h ; CHECK: st4.h call void @llvm.arm64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E) ret void } define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) { -; CHECK: st4lane_4s +; CHECK-LABEL: st4lane_4s ; CHECK: st4.s call void @llvm.arm64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E) ret void } define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) { -; CHECK: st4lane_2d +; CHECK-LABEL: st4lane_2d ; CHECK: st4.d call void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E) ret void @@ -133,21 +181,21 @@ declare void @llvm.arm64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64 define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind { -; CHECK: st2_8b +; CHECK-LABEL: st2_8b ; CHECK st2.8b call void @llvm.arm64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P) ret void } define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind { -; CHECK: st3_8b +; CHECK-LABEL: st3_8b ; CHECK st3.8b call void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) ret void } define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind { -; CHECK: st4_8b +; CHECK-LABEL: st4_8b ; CHECK st4.8b call void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) ret void @@ -158,21 +206,21 @@ declare void @llvm.arm64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) n declare void @llvm.arm64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind { -; CHECK: st2_16b +; CHECK-LABEL: st2_16b ; CHECK st2.16b call void @llvm.arm64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P) ret void } define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind { -; CHECK: st3_16b +; CHECK-LABEL: st3_16b ; CHECK st3.16b call void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) ret void } define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind { -; CHECK: st4_16b +; CHECK-LABEL: st4_16b ; CHECK st4.16b call void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) ret void @@ -183,21 +231,21 @@ declare void @llvm.arm64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8 declare void @llvm.arm64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind { -; CHECK: st2_4h +; CHECK-LABEL: st2_4h ; CHECK st2.4h call void @llvm.arm64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P) ret void } define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind { -; CHECK: st3_4h +; CHECK-LABEL: st3_4h ; CHECK st3.4h call void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) ret void } define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind { -; CHECK: st4_4h +; CHECK-LABEL: st4_4h ; CHECK st4.4h call void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) ret void @@ -208,21 +256,21 @@ declare void @llvm.arm64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i declare void @llvm.arm64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind { -; CHECK: st2_8h +; CHECK-LABEL: st2_8h ; CHECK st2.8h call void @llvm.arm64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P) ret void } define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind { -; CHECK: st3_8h +; CHECK-LABEL: st3_8h ; CHECK st3.8h call void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) ret void } define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind { -; CHECK: st4_8h +; CHECK-LABEL: st4_8h ; CHECK st4.8h call void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) ret void @@ -233,21 +281,21 @@ declare void @llvm.arm64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i declare void @llvm.arm64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind { -; CHECK: st2_2s +; CHECK-LABEL: st2_2s ; CHECK st2.2s call void @llvm.arm64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P) ret void } define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind { -; CHECK: st3_2s +; CHECK-LABEL: st3_2s ; CHECK st3.2s call void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) ret void } define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind { -; CHECK: st4_2s +; CHECK-LABEL: st4_2s ; CHECK st4.2s call void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) ret void @@ -258,21 +306,21 @@ declare void @llvm.arm64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i declare void @llvm.arm64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind { -; CHECK: st2_4s +; CHECK-LABEL: st2_4s ; CHECK st2.4s call void @llvm.arm64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P) ret void } define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind { -; CHECK: st3_4s +; CHECK-LABEL: st3_4s ; CHECK st3.4s call void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) ret void } define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind { -; CHECK: st4_4s +; CHECK-LABEL: st4_4s ; CHECK st4.4s call void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) ret void @@ -283,21 +331,21 @@ declare void @llvm.arm64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i declare void @llvm.arm64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind { -; CHECK: st2_1d +; CHECK-LABEL: st2_1d ; CHECK st1.2d call void @llvm.arm64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P) ret void } define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind { -; CHECK: st3_1d +; CHECK-LABEL: st3_1d ; CHECK st1.3d call void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) ret void } define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind { -; CHECK: st4_1d +; CHECK-LABEL: st4_1d ; CHECK st1.4d call void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) ret void @@ -308,21 +356,21 @@ declare void @llvm.arm64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i declare void @llvm.arm64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind { -; CHECK: st2_2d +; CHECK-LABEL: st2_2d ; CHECK st2.2d call void @llvm.arm64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P) ret void } define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind { -; CHECK: st3_2d +; CHECK-LABEL: st3_2d ; CHECK st2.3d call void @llvm.arm64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) ret void } define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind { -; CHECK: st4_2d +; CHECK-LABEL: st4_2d ; CHECK st2.4d call void @llvm.arm64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) ret void