[CodeGen] Update min-legal-vector width based on function argument and return types

This is a continuation of my patches to inform the X86 backend about what the largest IR types are in the function so that we can restrict the backend type legalizer to prevent 512-bit vectors on SKX when -mprefer-vector-width=256 is specified if no explicit 512 bit vectors were specified by the user.

This patch updates the vector width based on the argument and return types of the current function and from the types of any functions it calls. This is intended to make sure the backend type legalizer doesn't disturb any types that are required for ABI.

Differential Revision: https://reviews.llvm.org/D52441

llvm-svn: 345168
This commit is contained in:
Craig Topper 2018-10-24 17:42:17 +00:00
parent 618c0bc363
commit 3113ec3dc7
17 changed files with 723 additions and 591 deletions

View File

@ -4240,6 +4240,13 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
}
#endif
// Update the largest vector width if any arguments have vector types.
for (unsigned i = 0; i < IRCallArgs.size(); ++i) {
if (auto *VT = dyn_cast<llvm::VectorType>(IRCallArgs[i]->getType()))
LargestVectorWidth = std::max(LargestVectorWidth,
VT->getPrimitiveSizeInBits());
}
// Compute the calling convention and attributes.
unsigned CallingConv;
llvm::AttributeList Attrs;
@ -4320,6 +4327,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
if (!CI->getType()->isVoidTy())
CI->setName("call");
// Update largest vector width from the return type.
if (auto *VT = dyn_cast<llvm::VectorType>(CI->getType()))
LargestVectorWidth = std::max(LargestVectorWidth,
VT->getPrimitiveSizeInBits());
// Insert instrumentation or attach profile metadata at indirect call sites.
// For more details, see the comment before the definition of
// IPVK_IndirectCallTarget in InstrProfData.inc.

View File

@ -430,7 +430,24 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
NormalCleanupDest = Address::invalid();
}
// Add the required-vector-width attribute.
// Scan function arguments for vector width.
for (llvm::Argument &A : CurFn->args())
if (auto *VT = dyn_cast<llvm::VectorType>(A.getType()))
LargestVectorWidth = std::max(LargestVectorWidth,
VT->getPrimitiveSizeInBits());
// Update vector width based on return type.
if (auto *VT = dyn_cast<llvm::VectorType>(CurFn->getReturnType()))
LargestVectorWidth = std::max(LargestVectorWidth,
VT->getPrimitiveSizeInBits());
// Add the required-vector-width attribute. This contains the max width from:
// 1. min-vector-width attribute used in the source program.
// 2. Any builtins used that have a vector width specified.
// 3. Values passed in and out of inline assembly.
// 4. Width of vector arguments and return types for this function.
// 5. Width of vector aguments and return types for functions called by this
// function.
if (LargestVectorWidth != 0)
CurFn->addFnAttr("min-legal-vector-width",
llvm::utostr(LargestVectorWidth));

View File

@ -11,7 +11,7 @@ int8x8_t test_vand_s8(int8x8_t a, int8x8_t b) {
return vand_s8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vandq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vandq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b
// CHECK: ret <16 x i8> [[AND_I]]
int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) {
@ -25,7 +25,7 @@ int16x4_t test_vand_s16(int16x4_t a, int16x4_t b) {
return vand_s16(a, b);
}
// CHECK-LABEL: define <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define <8 x i16> @test_vandq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b
// CHECK: ret <8 x i16> [[AND_I]]
int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) {
@ -39,7 +39,7 @@ int32x2_t test_vand_s32(int32x2_t a, int32x2_t b) {
return vand_s32(a, b);
}
// CHECK-LABEL: define <4 x i32> @test_vandq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define <4 x i32> @test_vandq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b
// CHECK: ret <4 x i32> [[AND_I]]
int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) {
@ -53,7 +53,7 @@ int64x1_t test_vand_s64(int64x1_t a, int64x1_t b) {
return vand_s64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vandq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vandq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b
// CHECK: ret <2 x i64> [[AND_I]]
int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) {
@ -67,7 +67,7 @@ uint8x8_t test_vand_u8(uint8x8_t a, uint8x8_t b) {
return vand_u8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vandq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, %b
// CHECK: ret <16 x i8> [[AND_I]]
uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) {
@ -81,7 +81,7 @@ uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) {
return vand_u16(a, b);
}
// CHECK-LABEL: define <8 x i16> @test_vandq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define <8 x i16> @test_vandq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, %b
// CHECK: ret <8 x i16> [[AND_I]]
uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) {
@ -95,7 +95,7 @@ uint32x2_t test_vand_u32(uint32x2_t a, uint32x2_t b) {
return vand_u32(a, b);
}
// CHECK-LABEL: define <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define <4 x i32> @test_vandq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, %b
// CHECK: ret <4 x i32> [[AND_I]]
uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) {
@ -109,7 +109,7 @@ uint64x1_t test_vand_u64(uint64x1_t a, uint64x1_t b) {
return vand_u64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vandq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vandq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, %b
// CHECK: ret <2 x i64> [[AND_I]]
uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) {
@ -123,7 +123,7 @@ int8x8_t test_vorr_s8(int8x8_t a, int8x8_t b) {
return vorr_s8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vorrq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vorrq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b
// CHECK: ret <16 x i8> [[OR_I]]
int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) {
@ -137,7 +137,7 @@ int16x4_t test_vorr_s16(int16x4_t a, int16x4_t b) {
return vorr_s16(a, b);
}
// CHECK-LABEL: define <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define <8 x i16> @test_vorrq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b
// CHECK: ret <8 x i16> [[OR_I]]
int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) {
@ -151,7 +151,7 @@ int32x2_t test_vorr_s32(int32x2_t a, int32x2_t b) {
return vorr_s32(a, b);
}
// CHECK-LABEL: define <4 x i32> @test_vorrq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define <4 x i32> @test_vorrq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b
// CHECK: ret <4 x i32> [[OR_I]]
int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) {
@ -165,7 +165,7 @@ int64x1_t test_vorr_s64(int64x1_t a, int64x1_t b) {
return vorr_s64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vorrq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vorrq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b
// CHECK: ret <2 x i64> [[OR_I]]
int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) {
@ -179,7 +179,7 @@ uint8x8_t test_vorr_u8(uint8x8_t a, uint8x8_t b) {
return vorr_u8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vorrq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, %b
// CHECK: ret <16 x i8> [[OR_I]]
uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) {
@ -193,7 +193,7 @@ uint16x4_t test_vorr_u16(uint16x4_t a, uint16x4_t b) {
return vorr_u16(a, b);
}
// CHECK-LABEL: define <8 x i16> @test_vorrq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define <8 x i16> @test_vorrq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, %b
// CHECK: ret <8 x i16> [[OR_I]]
uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) {
@ -207,7 +207,7 @@ uint32x2_t test_vorr_u32(uint32x2_t a, uint32x2_t b) {
return vorr_u32(a, b);
}
// CHECK-LABEL: define <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define <4 x i32> @test_vorrq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, %b
// CHECK: ret <4 x i32> [[OR_I]]
uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) {
@ -221,7 +221,7 @@ uint64x1_t test_vorr_u64(uint64x1_t a, uint64x1_t b) {
return vorr_u64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vorrq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vorrq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, %b
// CHECK: ret <2 x i64> [[OR_I]]
uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) {
@ -235,7 +235,7 @@ int8x8_t test_veor_s8(int8x8_t a, int8x8_t b) {
return veor_s8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_veorq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_veorq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
// CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b
// CHECK: ret <16 x i8> [[XOR_I]]
int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) {
@ -249,7 +249,7 @@ int16x4_t test_veor_s16(int16x4_t a, int16x4_t b) {
return veor_s16(a, b);
}
// CHECK-LABEL: define <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define <8 x i16> @test_veorq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
// CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b
// CHECK: ret <8 x i16> [[XOR_I]]
int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) {
@ -263,7 +263,7 @@ int32x2_t test_veor_s32(int32x2_t a, int32x2_t b) {
return veor_s32(a, b);
}
// CHECK-LABEL: define <4 x i32> @test_veorq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define <4 x i32> @test_veorq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
// CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b
// CHECK: ret <4 x i32> [[XOR_I]]
int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) {
@ -277,7 +277,7 @@ int64x1_t test_veor_s64(int64x1_t a, int64x1_t b) {
return veor_s64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_veorq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_veorq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b
// CHECK: ret <2 x i64> [[XOR_I]]
int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) {
@ -291,7 +291,7 @@ uint8x8_t test_veor_u8(uint8x8_t a, uint8x8_t b) {
return veor_u8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_veorq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
// CHECK: [[XOR_I:%.*]] = xor <16 x i8> %a, %b
// CHECK: ret <16 x i8> [[XOR_I]]
uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) {
@ -305,7 +305,7 @@ uint16x4_t test_veor_u16(uint16x4_t a, uint16x4_t b) {
return veor_u16(a, b);
}
// CHECK-LABEL: define <8 x i16> @test_veorq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define <8 x i16> @test_veorq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
// CHECK: [[XOR_I:%.*]] = xor <8 x i16> %a, %b
// CHECK: ret <8 x i16> [[XOR_I]]
uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) {
@ -319,7 +319,7 @@ uint32x2_t test_veor_u32(uint32x2_t a, uint32x2_t b) {
return veor_u32(a, b);
}
// CHECK-LABEL: define <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define <4 x i32> @test_veorq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
// CHECK: [[XOR_I:%.*]] = xor <4 x i32> %a, %b
// CHECK: ret <4 x i32> [[XOR_I]]
uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) {
@ -333,7 +333,7 @@ uint64x1_t test_veor_u64(uint64x1_t a, uint64x1_t b) {
return veor_u64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_veorq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_veorq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[XOR_I:%.*]] = xor <2 x i64> %a, %b
// CHECK: ret <2 x i64> [[XOR_I]]
uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) {
@ -348,7 +348,7 @@ int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) {
return vbic_s8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vbicq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vbicq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
// CHECK: ret <16 x i8> [[AND_I]]
@ -364,7 +364,7 @@ int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) {
return vbic_s16(a, b);
}
// CHECK-LABEL: define <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define <8 x i16> @test_vbicq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
// CHECK: ret <8 x i16> [[AND_I]]
@ -380,7 +380,7 @@ int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) {
return vbic_s32(a, b);
}
// CHECK-LABEL: define <4 x i32> @test_vbicq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define <4 x i32> @test_vbicq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
// CHECK: ret <4 x i32> [[AND_I]]
@ -396,7 +396,7 @@ int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) {
return vbic_s64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vbicq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vbicq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
// CHECK: ret <2 x i64> [[AND_I]]
@ -412,7 +412,7 @@ uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) {
return vbic_u8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vbicq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
// CHECK: [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
// CHECK: ret <16 x i8> [[AND_I]]
@ -428,7 +428,7 @@ uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) {
return vbic_u16(a, b);
}
// CHECK-LABEL: define <8 x i16> @test_vbicq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define <8 x i16> @test_vbicq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
// CHECK: [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
// CHECK: ret <8 x i16> [[AND_I]]
@ -444,7 +444,7 @@ uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) {
return vbic_u32(a, b);
}
// CHECK-LABEL: define <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define <4 x i32> @test_vbicq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
// CHECK: [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
// CHECK: ret <4 x i32> [[AND_I]]
@ -460,7 +460,7 @@ uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) {
return vbic_u64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vbicq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vbicq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
// CHECK: [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
// CHECK: ret <2 x i64> [[AND_I]]
@ -476,7 +476,7 @@ int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) {
return vorn_s8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vornq_s8(<16 x i8> %a, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vornq_s8(<16 x i8> %a, <16 x i8> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
// CHECK: ret <16 x i8> [[OR_I]]
@ -492,7 +492,7 @@ int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) {
return vorn_s16(a, b);
}
// CHECK-LABEL: define <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define <8 x i16> @test_vornq_s16(<8 x i16> %a, <8 x i16> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
// CHECK: ret <8 x i16> [[OR_I]]
@ -508,7 +508,7 @@ int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) {
return vorn_s32(a, b);
}
// CHECK-LABEL: define <4 x i32> @test_vornq_s32(<4 x i32> %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define <4 x i32> @test_vornq_s32(<4 x i32> %a, <4 x i32> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
// CHECK: ret <4 x i32> [[OR_I]]
@ -524,7 +524,7 @@ int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) {
return vorn_s64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vornq_s64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vornq_s64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
// CHECK: ret <2 x i64> [[OR_I]]
@ -540,7 +540,7 @@ uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) {
return vorn_u8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vornq_u8(<16 x i8> %a, <16 x i8> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
// CHECK: [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
// CHECK: ret <16 x i8> [[OR_I]]
@ -556,7 +556,7 @@ uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) {
return vorn_u16(a, b);
}
// CHECK-LABEL: define <8 x i16> @test_vornq_u16(<8 x i16> %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define <8 x i16> @test_vornq_u16(<8 x i16> %a, <8 x i16> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
// CHECK: [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
// CHECK: ret <8 x i16> [[OR_I]]
@ -572,7 +572,7 @@ uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) {
return vorn_u32(a, b);
}
// CHECK-LABEL: define <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define <4 x i32> @test_vornq_u32(<4 x i32> %a, <4 x i32> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
// CHECK: [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
// CHECK: ret <4 x i32> [[OR_I]]
@ -588,10 +588,13 @@ uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) {
return vorn_u64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vornq_u64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vornq_u64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
// CHECK: [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
// CHECK: ret <2 x i64> [[OR_I]]
uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) {
return vornq_u64(a, b);
}
// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

View File

@ -6,7 +6,7 @@
#include <arm_neon.h>
// CHECK-LABEL: define i16 @test_vaddlv_s8(<8 x i8> %a) #0 {
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) #2
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i8(<8 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
// CHECK: ret i16 [[TMP0]]
int16_t test_vaddlv_s8(int8x8_t a) {
@ -14,14 +14,14 @@ int16_t test_vaddlv_s8(int8x8_t a) {
}
// CHECK-LABEL: define i32 @test_vaddlv_s16(<4 x i16> %a) #0 {
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) #2
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v4i16(<4 x i16> %a) #3
// CHECK: ret i32 [[VADDLV_I]]
int32_t test_vaddlv_s16(int16x4_t a) {
return vaddlv_s16(a);
}
// CHECK-LABEL: define i16 @test_vaddlv_u8(<8 x i8> %a) #0 {
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) #2
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i8(<8 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
// CHECK: ret i16 [[TMP0]]
uint16_t test_vaddlv_u8(uint8x8_t a) {
@ -29,58 +29,58 @@ uint16_t test_vaddlv_u8(uint8x8_t a) {
}
// CHECK-LABEL: define i32 @test_vaddlv_u16(<4 x i16> %a) #0 {
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) #2
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v4i16(<4 x i16> %a) #3
// CHECK: ret i32 [[VADDLV_I]]
uint32_t test_vaddlv_u16(uint16x4_t a) {
return vaddlv_u16(a);
}
// CHECK-LABEL: define i16 @test_vaddlvq_s8(<16 x i8> %a) #0 {
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) #2
// CHECK-LABEL: define i16 @test_vaddlvq_s8(<16 x i8> %a) #1 {
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v16i8(<16 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
// CHECK: ret i16 [[TMP0]]
int16_t test_vaddlvq_s8(int8x16_t a) {
return vaddlvq_s8(a);
}
// CHECK-LABEL: define i32 @test_vaddlvq_s16(<8 x i16> %a) #0 {
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) #2
// CHECK-LABEL: define i32 @test_vaddlvq_s16(<8 x i16> %a) #1 {
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.saddlv.i32.v8i16(<8 x i16> %a) #3
// CHECK: ret i32 [[VADDLV_I]]
int32_t test_vaddlvq_s16(int16x8_t a) {
return vaddlvq_s16(a);
}
// CHECK-LABEL: define i64 @test_vaddlvq_s32(<4 x i32> %a) #0 {
// CHECK: [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) #2
// CHECK-LABEL: define i64 @test_vaddlvq_s32(<4 x i32> %a) #1 {
// CHECK: [[VADDLVQ_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %a) #3
// CHECK: ret i64 [[VADDLVQ_S32_I]]
int64_t test_vaddlvq_s32(int32x4_t a) {
return vaddlvq_s32(a);
}
// CHECK-LABEL: define i16 @test_vaddlvq_u8(<16 x i8> %a) #0 {
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) #2
// CHECK-LABEL: define i16 @test_vaddlvq_u8(<16 x i8> %a) #1 {
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v16i8(<16 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VADDLV_I]] to i16
// CHECK: ret i16 [[TMP0]]
uint16_t test_vaddlvq_u8(uint8x16_t a) {
return vaddlvq_u8(a);
}
// CHECK-LABEL: define i32 @test_vaddlvq_u16(<8 x i16> %a) #0 {
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) #2
// CHECK-LABEL: define i32 @test_vaddlvq_u16(<8 x i16> %a) #1 {
// CHECK: [[VADDLV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddlv.i32.v8i16(<8 x i16> %a) #3
// CHECK: ret i32 [[VADDLV_I]]
uint32_t test_vaddlvq_u16(uint16x8_t a) {
return vaddlvq_u16(a);
}
// CHECK-LABEL: define i64 @test_vaddlvq_u32(<4 x i32> %a) #0 {
// CHECK: [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) #2
// CHECK-LABEL: define i64 @test_vaddlvq_u32(<4 x i32> %a) #1 {
// CHECK: [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> %a) #3
// CHECK: ret i64 [[VADDLVQ_U32_I]]
uint64_t test_vaddlvq_u32(uint32x4_t a) {
return vaddlvq_u32(a);
}
// CHECK-LABEL: define i8 @test_vmaxv_s8(<8 x i8> %a) #0 {
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) #2
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
// CHECK: ret i8 [[TMP0]]
int8_t test_vmaxv_s8(int8x8_t a) {
@ -88,7 +88,7 @@ int8_t test_vmaxv_s8(int8x8_t a) {
}
// CHECK-LABEL: define i16 @test_vmaxv_s16(<4 x i16> %a) #0 {
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) #2
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a) #3
// CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
// CHECK: ret i16 [[TMP2]]
int16_t test_vmaxv_s16(int16x4_t a) {
@ -96,7 +96,7 @@ int16_t test_vmaxv_s16(int16x4_t a) {
}
// CHECK-LABEL: define i8 @test_vmaxv_u8(<8 x i8> %a) #0 {
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) #2
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i8(<8 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
// CHECK: ret i8 [[TMP0]]
uint8_t test_vmaxv_u8(uint8x8_t a) {
@ -104,61 +104,61 @@ uint8_t test_vmaxv_u8(uint8x8_t a) {
}
// CHECK-LABEL: define i16 @test_vmaxv_u16(<4 x i16> %a) #0 {
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) #2
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i16(<4 x i16> %a) #3
// CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
// CHECK: ret i16 [[TMP2]]
uint16_t test_vmaxv_u16(uint16x4_t a) {
return vmaxv_u16(a);
}
// CHECK-LABEL: define i8 @test_vmaxvq_s8(<16 x i8> %a) #0 {
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) #2
// CHECK-LABEL: define i8 @test_vmaxvq_s8(<16 x i8> %a) #1 {
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
// CHECK: ret i8 [[TMP0]]
int8_t test_vmaxvq_s8(int8x16_t a) {
return vmaxvq_s8(a);
}
// CHECK-LABEL: define i16 @test_vmaxvq_s16(<8 x i16> %a) #0 {
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) #2
// CHECK-LABEL: define i16 @test_vmaxvq_s16(<8 x i16> %a) #1 {
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a) #3
// CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
// CHECK: ret i16 [[TMP2]]
int16_t test_vmaxvq_s16(int16x8_t a) {
return vmaxvq_s16(a);
}
// CHECK-LABEL: define i32 @test_vmaxvq_s32(<4 x i32> %a) #0 {
// CHECK: [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) #2
// CHECK-LABEL: define i32 @test_vmaxvq_s32(<4 x i32> %a) #1 {
// CHECK: [[VMAXVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a) #3
// CHECK: ret i32 [[VMAXVQ_S32_I]]
int32_t test_vmaxvq_s32(int32x4_t a) {
return vmaxvq_s32(a);
}
// CHECK-LABEL: define i8 @test_vmaxvq_u8(<16 x i8> %a) #0 {
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) #2
// CHECK-LABEL: define i8 @test_vmaxvq_u8(<16 x i8> %a) #1 {
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v16i8(<16 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VMAXV_I]] to i8
// CHECK: ret i8 [[TMP0]]
uint8_t test_vmaxvq_u8(uint8x16_t a) {
return vmaxvq_u8(a);
}
// CHECK-LABEL: define i16 @test_vmaxvq_u16(<8 x i16> %a) #0 {
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) #2
// CHECK-LABEL: define i16 @test_vmaxvq_u16(<8 x i16> %a) #1 {
// CHECK: [[VMAXV_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v8i16(<8 x i16> %a) #3
// CHECK: [[TMP2:%.*]] = trunc i32 [[VMAXV_I]] to i16
// CHECK: ret i16 [[TMP2]]
uint16_t test_vmaxvq_u16(uint16x8_t a) {
return vmaxvq_u16(a);
}
// CHECK-LABEL: define i32 @test_vmaxvq_u32(<4 x i32> %a) #0 {
// CHECK: [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) #2
// CHECK-LABEL: define i32 @test_vmaxvq_u32(<4 x i32> %a) #1 {
// CHECK: [[VMAXVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v4i32(<4 x i32> %a) #3
// CHECK: ret i32 [[VMAXVQ_U32_I]]
uint32_t test_vmaxvq_u32(uint32x4_t a) {
return vmaxvq_u32(a);
}
// CHECK-LABEL: define i8 @test_vminv_s8(<8 x i8> %a) #0 {
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) #2
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
// CHECK: ret i8 [[TMP0]]
int8_t test_vminv_s8(int8x8_t a) {
@ -166,7 +166,7 @@ int8_t test_vminv_s8(int8x8_t a) {
}
// CHECK-LABEL: define i16 @test_vminv_s16(<4 x i16> %a) #0 {
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) #2
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a) #3
// CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
// CHECK: ret i16 [[TMP2]]
int16_t test_vminv_s16(int16x4_t a) {
@ -174,7 +174,7 @@ int16_t test_vminv_s16(int16x4_t a) {
}
// CHECK-LABEL: define i8 @test_vminv_u8(<8 x i8> %a) #0 {
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) #2
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i8(<8 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
// CHECK: ret i8 [[TMP0]]
uint8_t test_vminv_u8(uint8x8_t a) {
@ -182,61 +182,61 @@ uint8_t test_vminv_u8(uint8x8_t a) {
}
// CHECK-LABEL: define i16 @test_vminv_u16(<4 x i16> %a) #0 {
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) #2
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i16(<4 x i16> %a) #3
// CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
// CHECK: ret i16 [[TMP2]]
uint16_t test_vminv_u16(uint16x4_t a) {
return vminv_u16(a);
}
// CHECK-LABEL: define i8 @test_vminvq_s8(<16 x i8> %a) #0 {
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) #2
// CHECK-LABEL: define i8 @test_vminvq_s8(<16 x i8> %a) #1 {
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
// CHECK: ret i8 [[TMP0]]
int8_t test_vminvq_s8(int8x16_t a) {
return vminvq_s8(a);
}
// CHECK-LABEL: define i16 @test_vminvq_s16(<8 x i16> %a) #0 {
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) #2
// CHECK-LABEL: define i16 @test_vminvq_s16(<8 x i16> %a) #1 {
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a) #3
// CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
// CHECK: ret i16 [[TMP2]]
int16_t test_vminvq_s16(int16x8_t a) {
return vminvq_s16(a);
}
// CHECK-LABEL: define i32 @test_vminvq_s32(<4 x i32> %a) #0 {
// CHECK: [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) #2
// CHECK-LABEL: define i32 @test_vminvq_s32(<4 x i32> %a) #1 {
// CHECK: [[VMINVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a) #3
// CHECK: ret i32 [[VMINVQ_S32_I]]
int32_t test_vminvq_s32(int32x4_t a) {
return vminvq_s32(a);
}
// CHECK-LABEL: define i8 @test_vminvq_u8(<16 x i8> %a) #0 {
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) #2
// CHECK-LABEL: define i8 @test_vminvq_u8(<16 x i8> %a) #1 {
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v16i8(<16 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VMINV_I]] to i8
// CHECK: ret i8 [[TMP0]]
uint8_t test_vminvq_u8(uint8x16_t a) {
return vminvq_u8(a);
}
// CHECK-LABEL: define i16 @test_vminvq_u16(<8 x i16> %a) #0 {
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) #2
// CHECK-LABEL: define i16 @test_vminvq_u16(<8 x i16> %a) #1 {
// CHECK: [[VMINV_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v8i16(<8 x i16> %a) #3
// CHECK: [[TMP2:%.*]] = trunc i32 [[VMINV_I]] to i16
// CHECK: ret i16 [[TMP2]]
uint16_t test_vminvq_u16(uint16x8_t a) {
return vminvq_u16(a);
}
// CHECK-LABEL: define i32 @test_vminvq_u32(<4 x i32> %a) #0 {
// CHECK: [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) #2
// CHECK-LABEL: define i32 @test_vminvq_u32(<4 x i32> %a) #1 {
// CHECK: [[VMINVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v4i32(<4 x i32> %a) #3
// CHECK: ret i32 [[VMINVQ_U32_I]]
uint32_t test_vminvq_u32(uint32x4_t a) {
return vminvq_u32(a);
}
// CHECK-LABEL: define i8 @test_vaddv_s8(<8 x i8> %a) #0 {
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) #2
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i8(<8 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
// CHECK: ret i8 [[TMP0]]
int8_t test_vaddv_s8(int8x8_t a) {
@ -244,7 +244,7 @@ int8_t test_vaddv_s8(int8x8_t a) {
}
// CHECK-LABEL: define i16 @test_vaddv_s16(<4 x i16> %a) #0 {
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) #2
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i16(<4 x i16> %a) #3
// CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
// CHECK: ret i16 [[TMP2]]
int16_t test_vaddv_s16(int16x4_t a) {
@ -252,7 +252,7 @@ int16_t test_vaddv_s16(int16x4_t a) {
}
// CHECK-LABEL: define i8 @test_vaddv_u8(<8 x i8> %a) #0 {
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a) #2
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i8(<8 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
// CHECK: ret i8 [[TMP0]]
uint8_t test_vaddv_u8(uint8x8_t a) {
@ -260,83 +260,86 @@ uint8_t test_vaddv_u8(uint8x8_t a) {
}
// CHECK-LABEL: define i16 @test_vaddv_u16(<4 x i16> %a) #0 {
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a) #2
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i16(<4 x i16> %a) #3
// CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
// CHECK: ret i16 [[TMP2]]
uint16_t test_vaddv_u16(uint16x4_t a) {
return vaddv_u16(a);
}
// CHECK-LABEL: define i8 @test_vaddvq_s8(<16 x i8> %a) #0 {
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) #2
// CHECK-LABEL: define i8 @test_vaddvq_s8(<16 x i8> %a) #1 {
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v16i8(<16 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
// CHECK: ret i8 [[TMP0]]
int8_t test_vaddvq_s8(int8x16_t a) {
return vaddvq_s8(a);
}
// CHECK-LABEL: define i16 @test_vaddvq_s16(<8 x i16> %a) #0 {
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) #2
// CHECK-LABEL: define i16 @test_vaddvq_s16(<8 x i16> %a) #1 {
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v8i16(<8 x i16> %a) #3
// CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
// CHECK: ret i16 [[TMP2]]
int16_t test_vaddvq_s16(int16x8_t a) {
return vaddvq_s16(a);
}
// CHECK-LABEL: define i32 @test_vaddvq_s32(<4 x i32> %a) #0 {
// CHECK: [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) #2
// CHECK-LABEL: define i32 @test_vaddvq_s32(<4 x i32> %a) #1 {
// CHECK: [[VADDVQ_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v4i32(<4 x i32> %a) #3
// CHECK: ret i32 [[VADDVQ_S32_I]]
int32_t test_vaddvq_s32(int32x4_t a) {
return vaddvq_s32(a);
}
// CHECK-LABEL: define i8 @test_vaddvq_u8(<16 x i8> %a) #0 {
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a) #2
// CHECK-LABEL: define i8 @test_vaddvq_u8(<16 x i8> %a) #1 {
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v16i8(<16 x i8> %a) #3
// CHECK: [[TMP0:%.*]] = trunc i32 [[VADDV_I]] to i8
// CHECK: ret i8 [[TMP0]]
uint8_t test_vaddvq_u8(uint8x16_t a) {
return vaddvq_u8(a);
}
// CHECK-LABEL: define i16 @test_vaddvq_u16(<8 x i16> %a) #0 {
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a) #2
// CHECK-LABEL: define i16 @test_vaddvq_u16(<8 x i16> %a) #1 {
// CHECK: [[VADDV_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v8i16(<8 x i16> %a) #3
// CHECK: [[TMP2:%.*]] = trunc i32 [[VADDV_I]] to i16
// CHECK: ret i16 [[TMP2]]
uint16_t test_vaddvq_u16(uint16x8_t a) {
return vaddvq_u16(a);
}
// CHECK-LABEL: define i32 @test_vaddvq_u32(<4 x i32> %a) #0 {
// CHECK: [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a) #2
// CHECK-LABEL: define i32 @test_vaddvq_u32(<4 x i32> %a) #1 {
// CHECK: [[VADDVQ_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32> %a) #3
// CHECK: ret i32 [[VADDVQ_U32_I]]
uint32_t test_vaddvq_u32(uint32x4_t a) {
return vaddvq_u32(a);
}
// CHECK-LABEL: define float @test_vmaxvq_f32(<4 x float> %a) #0 {
// CHECK: [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) #2
// CHECK-LABEL: define float @test_vmaxvq_f32(<4 x float> %a) #1 {
// CHECK: [[VMAXVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v4f32(<4 x float> %a) #3
// CHECK: ret float [[VMAXVQ_F32_I]]
float32_t test_vmaxvq_f32(float32x4_t a) {
return vmaxvq_f32(a);
}
// CHECK-LABEL: define float @test_vminvq_f32(<4 x float> %a) #0 {
// CHECK: [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) #2
// CHECK-LABEL: define float @test_vminvq_f32(<4 x float> %a) #1 {
// CHECK: [[VMINVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v4f32(<4 x float> %a) #3
// CHECK: ret float [[VMINVQ_F32_I]]
float32_t test_vminvq_f32(float32x4_t a) {
return vminvq_f32(a);
}
// CHECK-LABEL: define float @test_vmaxnmvq_f32(<4 x float> %a) #0 {
// CHECK: [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) #2
// CHECK-LABEL: define float @test_vmaxnmvq_f32(<4 x float> %a) #1 {
// CHECK: [[VMAXNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v4f32(<4 x float> %a) #3
// CHECK: ret float [[VMAXNMVQ_F32_I]]
float32_t test_vmaxnmvq_f32(float32x4_t a) {
return vmaxnmvq_f32(a);
}
// CHECK-LABEL: define float @test_vminnmvq_f32(<4 x float> %a) #0 {
// CHECK: [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) #2
// CHECK-LABEL: define float @test_vminnmvq_f32(<4 x float> %a) #1 {
// CHECK: [[VMINNMVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float> %a) #3
// CHECK: ret float [[VMINNMVQ_F32_I]]
float32_t test_vminnmvq_f32(float32x4_t a) {
return vminnmvq_f32(a);
}
// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

View File

@ -14,7 +14,7 @@ float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
return vmla_n_f32(a, b, c);
}
// CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
// CHECK-LABEL: define <4 x float> @test_vmlaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #1 {
// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
@ -26,7 +26,7 @@ float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
return vmlaq_n_f32(a, b, c);
}
// CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
// CHECK-LABEL: define <2 x double> @test_vmlaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 {
// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
@ -36,7 +36,7 @@ float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
return vmlaq_n_f64(a, b, c);
}
// CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #0 {
// CHECK-LABEL: define <4 x float> @test_vmlsq_n_f32(<4 x float> %a, <4 x float> %b, float %c) #1 {
// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
@ -58,7 +58,7 @@ float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
return vmls_n_f32(a, b, c);
}
// CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
// CHECK-LABEL: define <2 x double> @test_vmlsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 {
// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
// CHECK: [[MUL_I:%.*]] = fmul <2 x double> %b, [[VECINIT1_I]]
@ -77,7 +77,7 @@ float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
return vmla_lane_f32(a, b, v, 0);
}
// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
@ -86,7 +86,7 @@ float32x4_t test_vmlaq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
return vmlaq_lane_f32(a, b, v, 0);
}
// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
@ -95,7 +95,7 @@ float32x2_t test_vmla_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
return vmla_laneq_f32(a, b, v, 0);
}
// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
@ -113,7 +113,7 @@ float32x2_t test_vmls_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) {
return vmls_lane_f32(a, b, v, 0);
}
// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
@ -122,7 +122,7 @@ float32x4_t test_vmlsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) {
return vmlsq_lane_f32(a, b, v, 0);
}
// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
@ -131,7 +131,7 @@ float32x2_t test_vmls_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) {
return vmls_laneq_f32(a, b, v, 0);
}
// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> zeroinitializer
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
@ -149,7 +149,7 @@ float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
return vmla_lane_f32(a, b, v, 1);
}
// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
// CHECK-LABEL: define <4 x float> @test_vmlaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
@ -158,7 +158,7 @@ float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
return vmlaq_lane_f32(a, b, v, 1);
}
// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
// CHECK-LABEL: define <2 x float> @test_vmla_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
// CHECK: [[ADD:%.*]] = fadd <2 x float> %a, [[MUL]]
@ -167,7 +167,7 @@ float32x2_t test_vmla_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
return vmla_laneq_f32(a, b, v, 3);
}
// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
// CHECK-LABEL: define <4 x float> @test_vmlaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[ADD:%.*]] = fadd <4 x float> %a, [[MUL]]
@ -185,7 +185,7 @@ float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
return vmls_lane_f32(a, b, v, 1);
}
// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 {
// CHECK-LABEL: define <4 x float> @test_vmlsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x float> %v, <2 x float> %v, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
@ -193,7 +193,7 @@ float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) {
float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) {
return vmlsq_lane_f32(a, b, v, 1);
}
// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 {
// CHECK-LABEL: define <2 x float> @test_vmls_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <2 x i32> <i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <2 x float> %b, [[SHUFFLE]]
// CHECK: [[SUB:%.*]] = fsub <2 x float> %a, [[MUL]]
@ -202,7 +202,7 @@ float32x2_t test_vmls_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) {
return vmls_laneq_f32(a, b, v, 3);
}
// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 {
// CHECK-LABEL: define <4 x float> @test_vmlsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <4 x float> %v, <4 x float> %v, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
// CHECK: [[MUL:%.*]] = fmul <4 x float> %b, [[SHUFFLE]]
// CHECK: [[SUB:%.*]] = fsub <4 x float> %a, [[MUL]]
@ -211,7 +211,7 @@ float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) {
return vmlsq_laneq_f32(a, b, v, 3);
}
// CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
// CHECK-LABEL: define <2 x double> @test_vfmaq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 {
// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> [[VECINIT1_I]], <2 x double> %a)
@ -220,12 +220,15 @@ float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
return vfmaq_n_f64(a, b, c);
}
// CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 {
// CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #1 {
// CHECK: [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0
// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1
// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> [[VECINIT1_I]], <2 x double> %a) #2
// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> [[VECINIT1_I]], <2 x double> %a) #3
// CHECK: ret <2 x double> [[TMP6]]
float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) {
return vfmsq_n_f64(a, b, c);
}
// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

File diff suppressed because it is too large Load Diff

View File

@ -23,7 +23,7 @@ float64_t test_vdupd_lane_f64(float64x1_t a) {
}
// CHECK-LABEL: define float @test_vdups_laneq_f32(<4 x float> %a) #0 {
// CHECK-LABEL: define float @test_vdups_laneq_f32(<4 x float> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
@ -33,7 +33,7 @@ float32_t test_vdups_laneq_f32(float32x4_t a) {
}
// CHECK-LABEL: define double @test_vdupd_laneq_f64(<2 x double> %a) #0 {
// CHECK-LABEL: define double @test_vdupd_laneq_f64(<2 x double> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@ -118,7 +118,7 @@ uint64_t test_vdupd_lane_u64(uint64x1_t a) {
return vdupd_lane_u64(a, 0);
}
// CHECK-LABEL: define i8 @test_vdupb_laneq_s8(<16 x i8> %a) #0 {
// CHECK-LABEL: define i8 @test_vdupb_laneq_s8(<16 x i8> %a) #1 {
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
// CHECK: ret i8 [[VGETQ_LANE]]
int8_t test_vdupb_laneq_s8(int8x16_t a) {
@ -126,7 +126,7 @@ int8_t test_vdupb_laneq_s8(int8x16_t a) {
}
// CHECK-LABEL: define i16 @test_vduph_laneq_s16(<8 x i16> %a) #0 {
// CHECK-LABEL: define i16 @test_vduph_laneq_s16(<8 x i16> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@ -136,7 +136,7 @@ int16_t test_vduph_laneq_s16(int16x8_t a) {
}
// CHECK-LABEL: define i32 @test_vdups_laneq_s32(<4 x i32> %a) #0 {
// CHECK-LABEL: define i32 @test_vdups_laneq_s32(<4 x i32> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@ -146,7 +146,7 @@ int32_t test_vdups_laneq_s32(int32x4_t a) {
}
// CHECK-LABEL: define i64 @test_vdupd_laneq_s64(<2 x i64> %a) #0 {
// CHECK-LABEL: define i64 @test_vdupd_laneq_s64(<2 x i64> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@ -156,7 +156,7 @@ int64_t test_vdupd_laneq_s64(int64x2_t a) {
}
// CHECK-LABEL: define i8 @test_vdupb_laneq_u8(<16 x i8> %a) #0 {
// CHECK-LABEL: define i8 @test_vdupb_laneq_u8(<16 x i8> %a) #1 {
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
// CHECK: ret i8 [[VGETQ_LANE]]
uint8_t test_vdupb_laneq_u8(uint8x16_t a) {
@ -164,7 +164,7 @@ uint8_t test_vdupb_laneq_u8(uint8x16_t a) {
}
// CHECK-LABEL: define i16 @test_vduph_laneq_u16(<8 x i16> %a) #0 {
// CHECK-LABEL: define i16 @test_vduph_laneq_u16(<8 x i16> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@ -174,7 +174,7 @@ uint16_t test_vduph_laneq_u16(uint16x8_t a) {
}
// CHECK-LABEL: define i32 @test_vdups_laneq_u32(<4 x i32> %a) #0 {
// CHECK-LABEL: define i32 @test_vdups_laneq_u32(<4 x i32> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@ -184,7 +184,7 @@ uint32_t test_vdups_laneq_u32(uint32x4_t a) {
}
// CHECK-LABEL: define i64 @test_vdupd_laneq_u64(<2 x i64> %a) #0 {
// CHECK-LABEL: define i64 @test_vdupd_laneq_u64(<2 x i64> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@ -209,14 +209,14 @@ poly16_t test_vduph_lane_p16(poly16x4_t a) {
return vduph_lane_p16(a, 3);
}
// CHECK-LABEL: define i8 @test_vdupb_laneq_p8(<16 x i8> %a) #0 {
// CHECK-LABEL: define i8 @test_vdupb_laneq_p8(<16 x i8> %a) #1 {
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
// CHECK: ret i8 [[VGETQ_LANE]]
poly8_t test_vdupb_laneq_p8(poly8x16_t a) {
return vdupb_laneq_p8(a, 15);
}
// CHECK-LABEL: define i16 @test_vduph_laneq_p16(<8 x i16> %a) #0 {
// CHECK-LABEL: define i16 @test_vduph_laneq_p16(<8 x i16> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@ -225,3 +225,5 @@ poly16_t test_vduph_laneq_p16(poly16x8_t a) {
return vduph_laneq_p16(a, 7);
}
// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

View File

@ -26,7 +26,7 @@ float64_t test_vmuld_lane_f64(float64_t a, float64x1_t b) {
return vmuld_lane_f64(a, b, 0);
}
// CHECK-LABEL: define float @test_vmuls_laneq_f32(float %a, <4 x float> %b) #0 {
// CHECK-LABEL: define float @test_vmuls_laneq_f32(float %a, <4 x float> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
@ -36,7 +36,7 @@ float32_t test_vmuls_laneq_f32(float32_t a, float32x4_t b) {
return vmuls_laneq_f32(a, b, 3);
}
// CHECK-LABEL: define double @test_vmuld_laneq_f64(double %a, <2 x double> %b) #0 {
// CHECK-LABEL: define double @test_vmuld_laneq_f64(double %a, <2 x double> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@ -65,7 +65,7 @@ float32_t test_vmulxs_lane_f32(float32_t a, float32x2_t b) {
return vmulxs_lane_f32(a, b, 1);
}
// CHECK-LABEL: define float @test_vmulxs_laneq_f32(float %a, <4 x float> %b) #0 {
// CHECK-LABEL: define float @test_vmulxs_laneq_f32(float %a, <4 x float> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
@ -85,7 +85,7 @@ float64_t test_vmulxd_lane_f64(float64_t a, float64x1_t b) {
return vmulxd_lane_f64(a, b, 0);
}
// CHECK-LABEL: define double @test_vmulxd_laneq_f64(double %a, <2 x double> %b) #0 {
// CHECK-LABEL: define double @test_vmulxd_laneq_f64(double %a, <2 x double> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@ -112,7 +112,7 @@ float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) {
}
// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_0(<1 x double> %a, <2 x double> %b) #0 {
// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_0(<1 x double> %a, <2 x double> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
@ -128,7 +128,7 @@ float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) {
return vmulx_laneq_f64(a, b, 0);
}
// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_1(<1 x double> %a, <2 x double> %b) #0 {
// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_1(<1 x double> %a, <2 x double> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double>
// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0
@ -165,7 +165,7 @@ float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) {
return vfmad_lane_f64(a, b, c, 0);
}
// CHECK-LABEL: define double @test_vfmad_laneq_f64(double %a, double %b, <2 x double> %c) #0 {
// CHECK-LABEL: define double @test_vfmad_laneq_f64(double %a, double %b, <2 x double> %c) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %c to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double>
// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
@ -215,7 +215,7 @@ float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
return vfms_lane_f64(a, b, v, 0);
}
// CHECK-LABEL: define <1 x double> @test_vfma_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #0 {
// CHECK-LABEL: define <1 x double> @test_vfma_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8>
@ -230,7 +230,7 @@ float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
return vfma_laneq_f64(a, b, v, 0);
}
// CHECK-LABEL: define <1 x double> @test_vfms_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #0 {
// CHECK-LABEL: define <1 x double> @test_vfms_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #1 {
// CHECK: [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b
// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8>
@ -269,7 +269,7 @@ int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) {
return vqdmulls_lane_s32(a, b, 1);
}
// CHECK-LABEL: define i32 @test_vqdmullh_laneq_s16(i16 %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define i32 @test_vqdmullh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@ -282,7 +282,7 @@ int32_t test_vqdmullh_laneq_s16(int16_t a, int16x8_t b) {
return vqdmullh_laneq_s16(a, b, 7);
}
// CHECK-LABEL: define i64 @test_vqdmulls_laneq_s32(i32 %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define i64 @test_vqdmulls_laneq_s32(i32 %a, <4 x i32> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@ -316,7 +316,7 @@ int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) {
}
// CHECK-LABEL: define i16 @test_vqdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define i16 @test_vqdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@ -330,7 +330,7 @@ int16_t test_vqdmulhh_laneq_s16(int16_t a, int16x8_t b) {
}
// CHECK-LABEL: define i32 @test_vqdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define i32 @test_vqdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@ -364,7 +364,7 @@ int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) {
}
// CHECK-LABEL: define i16 @test_vqrdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define i16 @test_vqrdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@ -378,7 +378,7 @@ int16_t test_vqrdmulhh_laneq_s16(int16_t a, int16x8_t b) {
}
// CHECK-LABEL: define i32 @test_vqrdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define i32 @test_vqrdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@ -413,7 +413,7 @@ int64_t test_vqdmlals_lane_s32(int64_t a, int32_t b, int32x2_t c) {
return vqdmlals_lane_s32(a, b, c, 1);
}
// CHECK-LABEL: define i32 @test_vqdmlalh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #0 {
// CHECK-LABEL: define i32 @test_vqdmlalh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@ -427,7 +427,7 @@ int32_t test_vqdmlalh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
return vqdmlalh_laneq_s16(a, b, c, 7);
}
// CHECK-LABEL: define i64 @test_vqdmlals_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #0 {
// CHECK-LABEL: define i64 @test_vqdmlals_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@ -463,7 +463,7 @@ int64_t test_vqdmlsls_lane_s32(int64_t a, int32_t b, int32x2_t c) {
return vqdmlsls_lane_s32(a, b, c, 1);
}
// CHECK-LABEL: define i32 @test_vqdmlslh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #0 {
// CHECK-LABEL: define i32 @test_vqdmlslh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@ -477,7 +477,7 @@ int32_t test_vqdmlslh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
return vqdmlslh_laneq_s16(a, b, c, 7);
}
// CHECK-LABEL: define i64 @test_vqdmlsls_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #0 {
// CHECK-LABEL: define i64 @test_vqdmlsls_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@ -513,7 +513,7 @@ float64x1_t test_vmulx_lane_f64_0() {
return result;
}
// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_2() #0 {
// CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_2() #1 {
// CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double>
// CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double>
// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> <i32 0, i32 1>
@ -540,3 +540,6 @@ float64x1_t test_vmulx_laneq_f64_2() {
result = vmulx_laneq_f64(arg1, arg3, 1);
return result;
}
// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

View File

@ -7,14 +7,14 @@
// CHECK-LABEL: define <8 x i8> @test_vtbl1_s8(<8 x i8> %a, <8 x i8> %b) #0 {
// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL11_I]]
int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
return vtbl1_s8(a, b);
}
// CHECK-LABEL: define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) #0 {
// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2
// CHECK-LABEL: define <8 x i8> @test_vqtbl1_s8(<16 x i8> %a, <8 x i8> %b) #1 {
// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL1_I]]
int8x8_t test_vqtbl1_s8(int8x16_t a, int8x8_t b) {
return vqtbl1_s8(a, b);
@ -36,7 +36,7 @@ int8x8_t test_vqtbl1_s8(int8x16_t a, int8x8_t b) {
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL13_I]]
int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
return vtbl2_s8(a, b);
@ -57,7 +57,7 @@ int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2
// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL2_I]]
int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) {
return vqtbl2_s8(a, b);
@ -83,7 +83,7 @@ int8x8_t test_vqtbl2_s8(int8x16x2_t a, int8x8_t b) {
// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2
// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL26_I]]
int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
return vtbl3_s8(a, b);
@ -107,7 +107,7 @@ int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2
// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL3_I]]
int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) {
return vqtbl3_s8(a, b);
@ -136,7 +136,7 @@ int8x8_t test_vqtbl3_s8(int8x16x3_t a, int8x8_t b) {
// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2
// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL28_I]]
int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
return vtbl4_s8(a, b);
@ -163,20 +163,20 @@ int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2
// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL4_I]]
int8x8_t test_vqtbl4_s8(int8x16x4_t a, int8x8_t b) {
return vqtbl4_s8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) #0 {
// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2
// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_s8(<16 x i8> %a, <16 x i8> %b) #1 {
// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3
// CHECK: ret <16 x i8> [[VTBL1_I]]
int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
return vqtbl1q_s8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_s8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x2_t, align 16
// CHECK: [[A:%.*]] = alloca %struct.int8x16x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[A]], i32 0, i32 0
@ -191,13 +191,13 @@ int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2
// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3
// CHECK: ret <16 x i8> [[VTBL2_I]]
int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
return vqtbl2q_s8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_s8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x3_t, align 16
// CHECK: [[A:%.*]] = alloca %struct.int8x16x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[A]], i32 0, i32 0
@ -215,13 +215,13 @@ int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2
// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3
// CHECK: ret <16 x i8> [[VTBL3_I]]
int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
return vqtbl3q_s8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_s8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
// CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x4_t, align 16
// CHECK: [[A:%.*]] = alloca %struct.int8x16x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[A]], i32 0, i32 0
@ -242,7 +242,7 @@ int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2
// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3
// CHECK: ret <16 x i8> [[VTBL4_I]]
int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
return vqtbl4q_s8(a, b);
@ -250,7 +250,7 @@ int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
// CHECK-LABEL: define <8 x i8> @test_vtbx1_s8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2
// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3
// CHECK: [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
// CHECK: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
// CHECK: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
@ -278,7 +278,7 @@ int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2
// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX13_I]]
int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
return vtbx2_s8(a, b, c);
@ -304,7 +304,7 @@ int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2
// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3
// CHECK: [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
// CHECK: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
// CHECK: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
@ -339,14 +339,14 @@ int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
// CHECK: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2
// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX28_I]]
int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
return vtbx4_s8(a, b, c);
}
// CHECK-LABEL: define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 {
// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2
// CHECK-LABEL: define <8 x i8> @test_vqtbx1_s8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 {
// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX1_I]]
int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, int8x8_t c) {
return vqtbx1_s8(a, b, c);
@ -367,7 +367,7 @@ int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, int8x8_t c) {
// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2
// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX2_I]]
int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, int8x8_t c) {
return vqtbx2_s8(a, b, c);
@ -391,7 +391,7 @@ int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, int8x8_t c) {
// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2
// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX3_I]]
int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, int8x8_t c) {
return vqtbx3_s8(a, b, c);
@ -418,20 +418,20 @@ int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, int8x8_t c) {
// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2
// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX4_I]]
int8x8_t test_vqtbx4_s8(int8x8_t a, int8x16x4_t b, int8x8_t c) {
return vqtbx4_s8(a, b, c);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2
// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_s8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 {
// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3
// CHECK: ret <16 x i8> [[VTBX1_I]]
int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
return vqtbx1q_s8(a, b, c);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_s8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x2_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
@ -446,13 +446,13 @@ int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2
// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3
// CHECK: ret <16 x i8> [[VTBX2_I]]
int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
return vqtbx2q_s8(a, b, c);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_s8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
@ -470,13 +470,13 @@ int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2
// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3
// CHECK: ret <16 x i8> [[VTBX3_I]]
int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
return vqtbx3q_s8(a, b, c);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_s8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
// CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
@ -497,7 +497,7 @@ int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2
// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3
// CHECK: ret <16 x i8> [[VTBX4_I]]
int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) {
return vqtbx4q_s8(a, b, c);
@ -505,14 +505,14 @@ int8x16_t test_vqtbx4q_s8(int8x16_t a, int8x16x4_t b, int8x16_t c) {
// CHECK-LABEL: define <8 x i8> @test_vtbl1_u8(<8 x i8> %a, <8 x i8> %b) #0 {
// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL11_I]]
uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
return vtbl1_u8(a, b);
}
// CHECK-LABEL: define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) #0 {
// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2
// CHECK-LABEL: define <8 x i8> @test_vqtbl1_u8(<16 x i8> %a, <8 x i8> %b) #1 {
// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL1_I]]
uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
return vqtbl1_u8(a, b);
@ -534,7 +534,7 @@ uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL13_I]]
uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
return vtbl2_u8(a, b);
@ -555,7 +555,7 @@ uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2
// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL2_I]]
uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
return vqtbl2_u8(a, b);
@ -581,7 +581,7 @@ uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2
// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL26_I]]
uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
return vtbl3_u8(a, b);
@ -605,7 +605,7 @@ uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2
// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL3_I]]
uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
return vqtbl3_u8(a, b);
@ -634,7 +634,7 @@ uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2
// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL28_I]]
uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
return vtbl4_u8(a, b);
@ -661,20 +661,20 @@ uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2
// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL4_I]]
uint8x8_t test_vqtbl4_u8(uint8x16x4_t a, uint8x8_t b) {
return vqtbl4_u8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) #0 {
// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2
// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_u8(<16 x i8> %a, <16 x i8> %b) #1 {
// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3
// CHECK: ret <16 x i8> [[VTBL1_I]]
uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
return vqtbl1q_u8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_u8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
// CHECK: [[A:%.*]] = alloca %struct.uint8x16x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[A]], i32 0, i32 0
@ -689,13 +689,13 @@ uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2
// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3
// CHECK: ret <16 x i8> [[VTBL2_I]]
uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
return vqtbl2q_u8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_u8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x3_t, align 16
// CHECK: [[A:%.*]] = alloca %struct.uint8x16x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[A]], i32 0, i32 0
@ -713,13 +713,13 @@ uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2
// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3
// CHECK: ret <16 x i8> [[VTBL3_I]]
uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
return vqtbl3q_u8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_u8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
// CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x4_t, align 16
// CHECK: [[A:%.*]] = alloca %struct.uint8x16x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[A]], i32 0, i32 0
@ -740,7 +740,7 @@ uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2
// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3
// CHECK: ret <16 x i8> [[VTBL4_I]]
uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
return vqtbl4q_u8(a, b);
@ -748,7 +748,7 @@ uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
// CHECK-LABEL: define <8 x i8> @test_vtbx1_u8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2
// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3
// CHECK: [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
// CHECK: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
// CHECK: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
@ -776,7 +776,7 @@ uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2
// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX13_I]]
uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
return vtbx2_u8(a, b, c);
@ -802,7 +802,7 @@ uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2
// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3
// CHECK: [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
// CHECK: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
// CHECK: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
@ -837,14 +837,14 @@ uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
// CHECK: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2
// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX28_I]]
uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
return vtbx4_u8(a, b, c);
}
// CHECK-LABEL: define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 {
// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2
// CHECK-LABEL: define <8 x i8> @test_vqtbx1_u8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 {
// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX1_I]]
uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
return vqtbx1_u8(a, b, c);
@ -865,7 +865,7 @@ uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2
// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX2_I]]
uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
return vqtbx2_u8(a, b, c);
@ -889,7 +889,7 @@ uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2
// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX3_I]]
uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
return vqtbx3_u8(a, b, c);
@ -916,20 +916,20 @@ uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2
// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX4_I]]
uint8x8_t test_vqtbx4_u8(uint8x8_t a, uint8x16x4_t b, uint8x8_t c) {
return vqtbx4_u8(a, b, c);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2
// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_u8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 {
// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3
// CHECK: ret <16 x i8> [[VTBX1_I]]
uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
return vqtbx1q_u8(a, b, c);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_u8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
@ -944,13 +944,13 @@ uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2
// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3
// CHECK: ret <16 x i8> [[VTBX2_I]]
uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
return vqtbx2q_u8(a, b, c);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_u8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
@ -968,13 +968,13 @@ uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2
// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3
// CHECK: ret <16 x i8> [[VTBX3_I]]
uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
return vqtbx3q_u8(a, b, c);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_u8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
// CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
@ -995,7 +995,7 @@ uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2
// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3
// CHECK: ret <16 x i8> [[VTBX4_I]]
uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) {
return vqtbx4q_u8(a, b, c);
@ -1003,14 +1003,14 @@ uint8x16_t test_vqtbx4q_u8(uint8x16_t a, uint8x16x4_t b, uint8x16_t c) {
// CHECK-LABEL: define <8 x i8> @test_vtbl1_p8(<8 x i8> %a, <8 x i8> %b) #0 {
// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL11_I]]
poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
return vtbl1_p8(a, b);
}
// CHECK-LABEL: define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) #0 {
// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #2
// CHECK-LABEL: define <8 x i8> @test_vqtbl1_p8(<16 x i8> %a, <8 x i8> %b) #1 {
// CHECK: [[VTBL1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL1_I]]
poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
return vqtbl1_p8(a, b);
@ -1032,7 +1032,7 @@ poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #2
// CHECK: [[VTBL13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL13_I]]
poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
return vtbl2_p8(a, b);
@ -1053,7 +1053,7 @@ poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #2
// CHECK: [[VTBL2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL2_I]]
poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
return vqtbl2_p8(a, b);
@ -1079,7 +1079,7 @@ poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #2
// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL26_I]]
poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
return vtbl3_p8(a, b);
@ -1103,7 +1103,7 @@ poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #2
// CHECK: [[VTBL3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL3_I]]
poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
return vqtbl3_p8(a, b);
@ -1132,7 +1132,7 @@ poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #2
// CHECK: [[VTBL28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL27_I]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL28_I]]
poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
return vtbl4_p8(a, b);
@ -1159,20 +1159,20 @@ poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #2
// CHECK: [[VTBL4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %b) #3
// CHECK: ret <8 x i8> [[VTBL4_I]]
poly8x8_t test_vqtbl4_p8(poly8x16x4_t a, uint8x8_t b) {
return vqtbl4_p8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) #0 {
// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #2
// CHECK-LABEL: define <16 x i8> @test_vqtbl1q_p8(<16 x i8> %a, <16 x i8> %b) #1 {
// CHECK: [[VTBL1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %a, <16 x i8> %b) #3
// CHECK: ret <16 x i8> [[VTBL1_I]]
poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
return vqtbl1q_p8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbl2q_p8([2 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
// CHECK: [[A:%.*]] = alloca %struct.poly8x16x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[A]], i32 0, i32 0
@ -1187,13 +1187,13 @@ poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #2
// CHECK: [[VTBL2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %b) #3
// CHECK: ret <16 x i8> [[VTBL2_I]]
poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
return vqtbl2q_p8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbl3q_p8([3 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x3_t, align 16
// CHECK: [[A:%.*]] = alloca %struct.poly8x16x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[A]], i32 0, i32 0
@ -1211,13 +1211,13 @@ poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #2
// CHECK: [[VTBL3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %b) #3
// CHECK: ret <16 x i8> [[VTBL3_I]]
poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
return vqtbl3q_p8(a, b);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbl4q_p8([4 x <16 x i8>] %a.coerce, <16 x i8> %b) #1 {
// CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x4_t, align 16
// CHECK: [[A:%.*]] = alloca %struct.poly8x16x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[A]], i32 0, i32 0
@ -1238,7 +1238,7 @@ poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P0_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #2
// CHECK: [[VTBL4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %b) #3
// CHECK: ret <16 x i8> [[VTBL4_I]]
poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
return vqtbl4q_p8(a, b);
@ -1246,7 +1246,7 @@ poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
// CHECK-LABEL: define <8 x i8> @test_vtbx1_p8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
// CHECK: [[VTBL1_I:%.*]] = shufflevector <8 x i8> %b, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #2
// CHECK: [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> %c) #3
// CHECK: [[TMP0:%.*]] = icmp uge <8 x i8> %c, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
// CHECK: [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
// CHECK: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], %a
@ -1274,7 +1274,7 @@ poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
// CHECK: [[VTBX1_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #2
// CHECK: [[VTBX13_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> [[VTBX1_I]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX13_I]]
poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
return vtbx2_p8(a, b, c);
@ -1300,7 +1300,7 @@ poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
// CHECK: [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
// CHECK: [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #2
// CHECK: [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> %c) #3
// CHECK: [[TMP4:%.*]] = icmp uge <8 x i8> %c, <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
// CHECK: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
// CHECK: [[TMP6:%.*]] = and <8 x i8> [[TMP5]], %a
@ -1335,14 +1335,14 @@ poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
// CHECK: [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
// CHECK: [[VTBX2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBX27_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #2
// CHECK: [[VTBX28_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[VTBX2_I]], <16 x i8> [[VTBX27_I]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX28_I]]
poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
return vtbx4_p8(a, b, c);
}
// CHECK-LABEL: define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #0 {
// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #2
// CHECK-LABEL: define <8 x i8> @test_vqtbx1_p8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #1 {
// CHECK: [[VTBX1_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX1_I]]
poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
return vqtbx1_p8(a, b, c);
@ -1363,7 +1363,7 @@ poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #2
// CHECK: [[VTBX2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX2_I]]
poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
return vqtbx2_p8(a, b, c);
@ -1387,7 +1387,7 @@ poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #2
// CHECK: [[VTBX3_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX3_I]]
poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
return vqtbx3_p8(a, b, c);
@ -1414,20 +1414,20 @@ poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #2
// CHECK: [[VTBX4_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <8 x i8> %c) #3
// CHECK: ret <8 x i8> [[VTBX4_I]]
poly8x8_t test_vqtbx4_p8(poly8x8_t a, poly8x16x4_t b, uint8x8_t c) {
return vqtbx4_p8(a, b, c);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #2
// CHECK-LABEL: define <16 x i8> @test_vqtbx1q_p8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #1 {
// CHECK: [[VTBX1_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #3
// CHECK: ret <16 x i8> [[VTBX1_I]]
poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
return vqtbx1q_p8(a, b, c);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbx2q_p8(<16 x i8> %a, [2 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
@ -1442,13 +1442,13 @@ poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
// CHECK: [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1_I]], i64 0, i64 1
// CHECK: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2_I]], align 16
// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #2
// CHECK: [[VTBX2_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> %c) #3
// CHECK: ret <16 x i8> [[VTBX2_I]]
poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
return vqtbx2q_p8(a, b, c);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbx3q_p8(<16 x i8> %a, [3 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x3_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
@ -1466,13 +1466,13 @@ poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
// CHECK: [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3_I]], i64 0, i64 2
// CHECK: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4_I]], align 16
// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #2
// CHECK: [[VTBX3_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> %c) #3
// CHECK: ret <16 x i8> [[VTBX3_I]]
poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
return vqtbx3q_p8(a, b, c);
}
// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #0 {
// CHECK-LABEL: define <16 x i8> @test_vqtbx4q_p8(<16 x i8> %a, [4 x <16 x i8>] %b.coerce, <16 x i8> %c) #1 {
// CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x4_t, align 16
// CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
@ -1493,8 +1493,11 @@ poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
// CHECK: [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__P1_I]], i32 0, i32 0
// CHECK: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5_I]], i64 0, i64 3
// CHECK: [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6_I]], align 16
// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #2
// CHECK: [[VTBX4_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %a, <16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> %c) #3
// CHECK: ret <16 x i8> [[VTBX4_I]]
poly8x16_t test_vqtbx4q_p8(poly8x16_t a, poly8x16x4_t b, uint8x16_t c) {
return vqtbx4q_p8(a, b, c);
}
// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

View File

@ -97,14 +97,14 @@ float32_t test_vget_lane_f16(float16x4_t a) {
return vget_lane_f16(a, 1);
}
// CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #0 {
// CHECK-LABEL: define i8 @test_vgetq_lane_u8(<16 x i8> %a) #1 {
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
// CHECK: ret i8 [[VGETQ_LANE]]
uint8_t test_vgetq_lane_u8(uint8x16_t a) {
return vgetq_lane_u8(a, 15);
}
// CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #0 {
// CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@ -113,7 +113,7 @@ uint16_t test_vgetq_lane_u16(uint16x8_t a) {
return vgetq_lane_u16(a, 7);
}
// CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #0 {
// CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@ -122,14 +122,14 @@ uint32_t test_vgetq_lane_u32(uint32x4_t a) {
return vgetq_lane_u32(a, 3);
}
// CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #0 {
// CHECK-LABEL: define i8 @test_vgetq_lane_s8(<16 x i8> %a) #1 {
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
// CHECK: ret i8 [[VGETQ_LANE]]
int8_t test_vgetq_lane_s8(int8x16_t a) {
return vgetq_lane_s8(a, 15);
}
// CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #0 {
// CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@ -138,7 +138,7 @@ int16_t test_vgetq_lane_s16(int16x8_t a) {
return vgetq_lane_s16(a, 7);
}
// CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #0 {
// CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
@ -147,14 +147,14 @@ int32_t test_vgetq_lane_s32(int32x4_t a) {
return vgetq_lane_s32(a, 3);
}
// CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #0 {
// CHECK-LABEL: define i8 @test_vgetq_lane_p8(<16 x i8> %a) #1 {
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
// CHECK: ret i8 [[VGETQ_LANE]]
poly8_t test_vgetq_lane_p8(poly8x16_t a) {
return vgetq_lane_p8(a, 15);
}
// CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #0 {
// CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7
@ -163,7 +163,7 @@ poly16_t test_vgetq_lane_p16(poly16x8_t a) {
return vgetq_lane_p16(a, 7);
}
// CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #0 {
// CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
@ -172,7 +172,7 @@ float32_t test_vgetq_lane_f32(float32x4_t a) {
return vgetq_lane_f32(a, 3);
}
// CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #0 {
// CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #1 {
// CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16
// CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2
// CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
@ -208,7 +208,7 @@ uint64_t test_vget_lane_u64(uint64x1_t a) {
return vget_lane_u64(a, 0);
}
// CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #0 {
// CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@ -217,7 +217,7 @@ int64_t test_vgetq_lane_s64(int64x2_t a) {
return vgetq_lane_s64(a, 1);
}
// CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #0 {
// CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@ -324,14 +324,14 @@ float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
return vset_lane_f16(*a, b, 3);
}
// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_u8(i8 %a, <16 x i8> %b) #1 {
// CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
// CHECK: ret <16 x i8> [[VSET_LANE]]
uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
return vsetq_lane_u8(a, b, 15);
}
// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
@ -340,7 +340,7 @@ uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
return vsetq_lane_u16(a, b, 7);
}
// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
@ -349,14 +349,14 @@ uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
return vsetq_lane_u32(a, b, 3);
}
// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_s8(i8 %a, <16 x i8> %b) #1 {
// CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
// CHECK: ret <16 x i8> [[VSET_LANE]]
int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
return vsetq_lane_s8(a, b, 15);
}
// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
@ -365,7 +365,7 @@ int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
return vsetq_lane_s16(a, b, 7);
}
// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #0 {
// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
@ -374,14 +374,14 @@ int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
return vsetq_lane_s32(a, b, 3);
}
// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #0 {
// CHECK-LABEL: define <16 x i8> @test_vsetq_lane_p8(i8 %a, <16 x i8> %b) #1 {
// CHECK: [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
// CHECK: ret <16 x i8> [[VSET_LANE]]
poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
return vsetq_lane_p8(a, b, 15);
}
// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #0 {
// CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7
@ -390,7 +390,7 @@ poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
return vsetq_lane_p16(a, b, 7);
}
// CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #0 {
// CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3
@ -399,7 +399,7 @@ float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
return vsetq_lane_f32(a, b, 3);
}
// CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #0 {
// CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #1 {
// CHECK: [[__REINT_248:%.*]] = alloca half, align 2
// CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
// CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
@ -439,7 +439,7 @@ uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
return vset_lane_u64(a, b, 0);
}
// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
@ -448,7 +448,7 @@ int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
return vsetq_lane_s64(a, b, 1);
}
// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
@ -456,3 +456,6 @@ int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
return vsetq_lane_u64(a, b, 1);
}
// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

View File

@ -14,7 +14,7 @@ uint64x1_t test_vceq_p64(poly64x1_t a, poly64x1_t b) {
return vceq_p64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vceqq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vceqq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %a, %b
// CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
// CHECK: ret <2 x i64> [[SEXT_I]]
@ -31,7 +31,7 @@ uint64x1_t test_vtst_p64(poly64x1_t a, poly64x1_t b) {
return vtst_p64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[TMP4:%.*]] = and <2 x i64> %a, %b
// CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
// CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
@ -50,7 +50,7 @@ poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) {
return vbsl_p64(a, b, c);
}
// CHECK-LABEL: define <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #0 {
// CHECK-LABEL: define <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #1 {
// CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %a, %b
// CHECK: [[TMP3:%.*]] = xor <2 x i64> %a, <i64 -1, i64 -1>
// CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c
@ -69,7 +69,7 @@ poly64_t test_vget_lane_p64(poly64x1_t v) {
return vget_lane_p64(v, 0);
}
// CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #0 {
// CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@ -87,7 +87,7 @@ poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) {
return vset_lane_p64(a, v, 0);
}
// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #0 {
// CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1
@ -109,7 +109,7 @@ poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
}
// CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0
@ -121,7 +121,7 @@ poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) {
return vcopyq_lane_p64(a, 1, b, 0);
}
// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@ -146,7 +146,7 @@ poly64x1_t test_vcreate_p64(uint64_t a) {
poly64x1_t test_vdup_n_p64(poly64_t a) {
return vdup_n_p64(a);
}
// CHECK-LABEL: define <2 x i64> @test_vdupq_n_p64(i64 %a) #0 {
// CHECK-LABEL: define <2 x i64> @test_vdupq_n_p64(i64 %a) #1 {
// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
// CHECK: ret <2 x i64> [[VECINIT1_I]]
@ -161,7 +161,7 @@ poly64x1_t test_vmov_n_p64(poly64_t a) {
return vmov_n_p64(a);
}
// CHECK-LABEL: define <2 x i64> @test_vmovq_n_p64(i64 %a) #0 {
// CHECK-LABEL: define <2 x i64> @test_vmovq_n_p64(i64 %a) #1 {
// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
// CHECK: ret <2 x i64> [[VECINIT1_I]]
@ -176,21 +176,21 @@ poly64x1_t test_vdup_lane_p64(poly64x1_t vec) {
return vdup_lane_p64(vec, 0);
}
// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #0 {
// CHECK-LABEL: define <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x i64> %vec, <1 x i64> %vec, <2 x i32> zeroinitializer
// CHECK: ret <2 x i64> [[SHUFFLE]]
poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) {
return vdupq_lane_p64(vec, 0);
}
// CHECK-LABEL: define <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #0 {
// CHECK-LABEL: define <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #1 {
// CHECK: [[SHUFFLE:%.*]] = shufflevector <2 x i64> %vec, <2 x i64> %vec, <2 x i32> <i32 1, i32 1>
// CHECK: ret <2 x i64> [[SHUFFLE]]
poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) {
return vdupq_laneq_p64(vec, 1);
}
// CHECK-LABEL: define <2 x i64> @test_vcombine_p64(<1 x i64> %low, <1 x i64> %high) #0 {
// CHECK-LABEL: define <2 x i64> @test_vcombine_p64(<1 x i64> %low, <1 x i64> %high) #1 {
// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %low, <1 x i64> %high, <2 x i32> <i32 0, i32 1>
// CHECK: ret <2 x i64> [[SHUFFLE_I]]
poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) {
@ -206,7 +206,7 @@ poly64x1_t test_vld1_p64(poly64_t const * ptr) {
return vld1_p64(ptr);
}
// CHECK-LABEL: define <2 x i64> @test_vld1q_p64(i64* %ptr) #0 {
// CHECK-LABEL: define <2 x i64> @test_vld1q_p64(i64* %ptr) #1 {
// CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8*
// CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
// CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
@ -226,7 +226,7 @@ void test_vst1_p64(poly64_t * ptr, poly64x1_t val) {
return vst1_p64(ptr, val);
}
// CHECK-LABEL: define void @test_vst1q_p64(i64* %ptr, <2 x i64> %val) #0 {
// CHECK-LABEL: define void @test_vst1q_p64(i64* %ptr, <2 x i64> %val) #1 {
// CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8*
// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %val to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
@ -237,7 +237,7 @@ void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) {
return vst1q_p64(ptr, val);
}
// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_p64(i64* %ptr) #0 {
// CHECK-LABEL: define %struct.poly64x1x2_t @test_vld2_p64(i64* %ptr) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
@ -255,7 +255,7 @@ poly64x1x2_t test_vld2_p64(poly64_t const * ptr) {
return vld2_p64(ptr);
}
// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_p64(i64* %ptr) #0 {
// CHECK-LABEL: define %struct.poly64x2x2_t @test_vld2q_p64(i64* %ptr) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
@ -273,7 +273,7 @@ poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) {
return vld2q_p64(ptr);
}
// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_p64(i64* %ptr) #0 {
// CHECK-LABEL: define %struct.poly64x1x3_t @test_vld3_p64(i64* %ptr) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
@ -291,7 +291,7 @@ poly64x1x3_t test_vld3_p64(poly64_t const * ptr) {
return vld3_p64(ptr);
}
// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_p64(i64* %ptr) #0 {
// CHECK-LABEL: define %struct.poly64x2x3_t @test_vld3q_p64(i64* %ptr) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
@ -309,7 +309,7 @@ poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) {
return vld3q_p64(ptr);
}
// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_p64(i64* %ptr) #0 {
// CHECK-LABEL: define %struct.poly64x1x4_t @test_vld4_p64(i64* %ptr) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
// CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
@ -327,7 +327,7 @@ poly64x1x4_t test_vld4_p64(poly64_t const * ptr) {
return vld4_p64(ptr);
}
// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_p64(i64* %ptr) #0 {
// CHECK-LABEL: define %struct.poly64x2x4_t @test_vld4q_p64(i64* %ptr) #2 {
// CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
// CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
// CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
@ -345,7 +345,7 @@ poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) {
return vld4q_p64(ptr);
}
// CHECK-LABEL: define void @test_vst2_p64(i64* %ptr, [2 x <1 x i64>] %val.coerce) #0 {
// CHECK-LABEL: define void @test_vst2_p64(i64* %ptr, [2 x <1 x i64>] %val.coerce) #2 {
// CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[VAL]], i32 0, i32 0
@ -370,7 +370,7 @@ void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) {
return vst2_p64(ptr, val);
}
// CHECK-LABEL: define void @test_vst2q_p64(i64* %ptr, [2 x <2 x i64>] %val.coerce) #0 {
// CHECK-LABEL: define void @test_vst2q_p64(i64* %ptr, [2 x <2 x i64>] %val.coerce) #2 {
// CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[VAL]], i32 0, i32 0
@ -395,7 +395,7 @@ void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) {
return vst2q_p64(ptr, val);
}
// CHECK-LABEL: define void @test_vst3_p64(i64* %ptr, [3 x <1 x i64>] %val.coerce) #0 {
// CHECK-LABEL: define void @test_vst3_p64(i64* %ptr, [3 x <1 x i64>] %val.coerce) #2 {
// CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[VAL]], i32 0, i32 0
@ -425,7 +425,7 @@ void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) {
return vst3_p64(ptr, val);
}
// CHECK-LABEL: define void @test_vst3q_p64(i64* %ptr, [3 x <2 x i64>] %val.coerce) #0 {
// CHECK-LABEL: define void @test_vst3q_p64(i64* %ptr, [3 x <2 x i64>] %val.coerce) #2 {
// CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[VAL]], i32 0, i32 0
@ -455,7 +455,7 @@ void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) {
return vst3q_p64(ptr, val);
}
// CHECK-LABEL: define void @test_vst4_p64(i64* %ptr, [4 x <1 x i64>] %val.coerce) #0 {
// CHECK-LABEL: define void @test_vst4_p64(i64* %ptr, [4 x <1 x i64>] %val.coerce) #2 {
// CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
// CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[VAL]], i32 0, i32 0
@ -490,7 +490,7 @@ void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) {
return vst4_p64(ptr, val);
}
// CHECK-LABEL: define void @test_vst4q_p64(i64* %ptr, [4 x <2 x i64>] %val.coerce) #0 {
// CHECK-LABEL: define void @test_vst4q_p64(i64* %ptr, [4 x <2 x i64>] %val.coerce) #2 {
// CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
// CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
// CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[VAL]], i32 0, i32 0
@ -537,7 +537,7 @@ poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) {
}
// CHECK-LABEL: define <2 x i64> @test_vextq_p64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vextq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
@ -548,42 +548,42 @@ poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) {
return vextq_p64(a, b, 1);
}
// CHECK-LABEL: define <2 x i64> @test_vzip1q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vzip1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
// CHECK: ret <2 x i64> [[SHUFFLE_I]]
poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) {
return vzip1q_p64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vzip2q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vzip2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
// CHECK: ret <2 x i64> [[SHUFFLE_I]]
poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) {
return vzip2q_u64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vuzp1q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vuzp1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
// CHECK: ret <2 x i64> [[SHUFFLE_I]]
poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) {
return vuzp1q_p64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vuzp2q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vuzp2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
// CHECK: ret <2 x i64> [[SHUFFLE_I]]
poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) {
return vuzp2q_u64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vtrn1q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vtrn1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
// CHECK: ret <2 x i64> [[SHUFFLE_I]]
poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) {
return vtrn1q_p64(a, b);
}
// CHECK-LABEL: define <2 x i64> @test_vtrn2q_p64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vtrn2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
// CHECK: ret <2 x i64> [[SHUFFLE_I]]
poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) {
@ -601,7 +601,7 @@ poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) {
return vsri_n_p64(a, b, 33);
}
// CHECK-LABEL: define <2 x i64> @test_vsriq_n_p64(<2 x i64> %a, <2 x i64> %b) #0 {
// CHECK-LABEL: define <2 x i64> @test_vsriq_n_p64(<2 x i64> %a, <2 x i64> %b) #1 {
// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
// CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
// CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
@ -612,3 +612,6 @@ poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) {
return vsriq_n_p64(a, b, 64);
}
// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
// CHECK-NOT: attributes #2 ={{.*}}"min-legal-vector-width"

View File

@ -8,14 +8,14 @@
#include <arm_neon.h>
// CHECK-LABEL: define <2 x float> @test_fma_order(<2 x float> %accum, <2 x float> %lhs, <2 x float> %rhs) #0 {
// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %rhs, <2 x float> %accum) #2
// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %lhs, <2 x float> %rhs, <2 x float> %accum) #3
// CHECK: ret <2 x float> [[TMP6]]
float32x2_t test_fma_order(float32x2_t accum, float32x2_t lhs, float32x2_t rhs) {
return vfma_f32(accum, lhs, rhs);
}
// CHECK-LABEL: define <4 x float> @test_fmaq_order(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) #0 {
// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %rhs, <4 x float> %accum) #2
// CHECK-LABEL: define <4 x float> @test_fmaq_order(<4 x float> %accum, <4 x float> %lhs, <4 x float> %rhs) #1 {
// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %lhs, <4 x float> %rhs, <4 x float> %accum) #3
// CHECK: ret <4 x float> [[TMP6]]
float32x4_t test_fmaq_order(float32x4_t accum, float32x4_t lhs, float32x4_t rhs) {
return vfmaq_f32(accum, lhs, rhs);
@ -32,7 +32,7 @@ float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
return vfma_n_f32(a, b, n);
}
// CHECK-LABEL: define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 {
// CHECK-LABEL: define <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #1 {
// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0
// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1
// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %n, i32 2
@ -44,3 +44,6 @@ float32x2_t test_vfma_n_f32(float32x2_t a, float32x2_t b, float32_t n) {
float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) {
return vfmaq_n_f32(a, b, n);
}
// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

View File

@ -3,29 +3,32 @@
#include <arm_neon.h>
// CHECK-LABEL: define <2 x float> @test_vmaxnm_f32(<2 x float> %a, <2 x float> %b) #0 {
// CHECK: [[VMAXNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %a, <2 x float> %b) #2
// CHECK: [[VMAXNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %a, <2 x float> %b) #3
// CHECK: ret <2 x float> [[VMAXNM_V2_I]]
float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
return vmaxnm_f32(a, b);
}
// CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
// CHECK: [[VMAXNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) #2
// CHECK-LABEL: define <4 x float> @test_vmaxnmq_f32(<4 x float> %a, <4 x float> %b) #1 {
// CHECK: [[VMAXNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b) #3
// CHECK: ret <4 x float> [[VMAXNMQ_V2_I]]
float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
return vmaxnmq_f32(a, b);
}
// CHECK-LABEL: define <2 x float> @test_vminnm_f32(<2 x float> %a, <2 x float> %b) #0 {
// CHECK: [[VMINNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %a, <2 x float> %b) #2
// CHECK: [[VMINNM_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %a, <2 x float> %b) #3
// CHECK: ret <2 x float> [[VMINNM_V2_I]]
float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
return vminnm_f32(a, b);
}
// CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #0 {
// CHECK: [[VMINNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %a, <4 x float> %b) #2
// CHECK-LABEL: define <4 x float> @test_vminnmq_f32(<4 x float> %a, <4 x float> %b) #1 {
// CHECK: [[VMINNMQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %a, <4 x float> %b) #3
// CHECK: ret <4 x float> [[VMINNMQ_V2_I]]
float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
return vminnmq_f32(a, b);
}
// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

View File

@ -3,113 +3,116 @@
#include <arm_neon.h>
// CHECK-LABEL: define <2 x i32> @test_vcvta_s32_f32(<2 x float> %a) #0 {
// CHECK: [[VCVTA_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a) #2
// CHECK: [[VCVTA_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %a) #3
// CHECK: ret <2 x i32> [[VCVTA_S32_V1_I]]
int32x2_t test_vcvta_s32_f32(float32x2_t a) {
return vcvta_s32_f32(a);
}
// CHECK-LABEL: define <2 x i32> @test_vcvta_u32_f32(<2 x float> %a) #0 {
// CHECK: [[VCVTA_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a) #2
// CHECK: [[VCVTA_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %a) #3
// CHECK: ret <2 x i32> [[VCVTA_U32_V1_I]]
uint32x2_t test_vcvta_u32_f32(float32x2_t a) {
return vcvta_u32_f32(a);
}
// CHECK-LABEL: define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #0 {
// CHECK: [[VCVTAQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a) #2
// CHECK-LABEL: define <4 x i32> @test_vcvtaq_s32_f32(<4 x float> %a) #1 {
// CHECK: [[VCVTAQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %a) #3
// CHECK: ret <4 x i32> [[VCVTAQ_S32_V1_I]]
int32x4_t test_vcvtaq_s32_f32(float32x4_t a) {
return vcvtaq_s32_f32(a);
}
// CHECK-LABEL: define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #0 {
// CHECK: [[VCVTAQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a) #2
// CHECK-LABEL: define <4 x i32> @test_vcvtaq_u32_f32(<4 x float> %a) #1 {
// CHECK: [[VCVTAQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %a) #3
// CHECK: ret <4 x i32> [[VCVTAQ_U32_V1_I]]
uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) {
return vcvtaq_u32_f32(a);
}
// CHECK-LABEL: define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) #0 {
// CHECK: [[VCVTN_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a) #2
// CHECK: [[VCVTN_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %a) #3
// CHECK: ret <2 x i32> [[VCVTN_S32_V1_I]]
int32x2_t test_vcvtn_s32_f32(float32x2_t a) {
return vcvtn_s32_f32(a);
}
// CHECK-LABEL: define <2 x i32> @test_vcvtn_u32_f32(<2 x float> %a) #0 {
// CHECK: [[VCVTN_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a) #2
// CHECK: [[VCVTN_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %a) #3
// CHECK: ret <2 x i32> [[VCVTN_U32_V1_I]]
uint32x2_t test_vcvtn_u32_f32(float32x2_t a) {
return vcvtn_u32_f32(a);
}
// CHECK-LABEL: define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #0 {
// CHECK: [[VCVTNQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a) #2
// CHECK-LABEL: define <4 x i32> @test_vcvtnq_s32_f32(<4 x float> %a) #1 {
// CHECK: [[VCVTNQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %a) #3
// CHECK: ret <4 x i32> [[VCVTNQ_S32_V1_I]]
int32x4_t test_vcvtnq_s32_f32(float32x4_t a) {
return vcvtnq_s32_f32(a);
}
// CHECK-LABEL: define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #0 {
// CHECK: [[VCVTNQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a) #2
// CHECK-LABEL: define <4 x i32> @test_vcvtnq_u32_f32(<4 x float> %a) #1 {
// CHECK: [[VCVTNQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %a) #3
// CHECK: ret <4 x i32> [[VCVTNQ_U32_V1_I]]
uint32x4_t test_vcvtnq_u32_f32(float32x4_t a) {
return vcvtnq_u32_f32(a);
}
// CHECK-LABEL: define <2 x i32> @test_vcvtp_s32_f32(<2 x float> %a) #0 {
// CHECK: [[VCVTP_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a) #2
// CHECK: [[VCVTP_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %a) #3
// CHECK: ret <2 x i32> [[VCVTP_S32_V1_I]]
int32x2_t test_vcvtp_s32_f32(float32x2_t a) {
return vcvtp_s32_f32(a);
}
// CHECK-LABEL: define <2 x i32> @test_vcvtp_u32_f32(<2 x float> %a) #0 {
// CHECK: [[VCVTP_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a) #2
// CHECK: [[VCVTP_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %a) #3
// CHECK: ret <2 x i32> [[VCVTP_U32_V1_I]]
uint32x2_t test_vcvtp_u32_f32(float32x2_t a) {
return vcvtp_u32_f32(a);
}
// CHECK-LABEL: define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #0 {
// CHECK: [[VCVTPQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a) #2
// CHECK-LABEL: define <4 x i32> @test_vcvtpq_s32_f32(<4 x float> %a) #1 {
// CHECK: [[VCVTPQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %a) #3
// CHECK: ret <4 x i32> [[VCVTPQ_S32_V1_I]]
int32x4_t test_vcvtpq_s32_f32(float32x4_t a) {
return vcvtpq_s32_f32(a);
}
// CHECK-LABEL: define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #0 {
// CHECK: [[VCVTPQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a) #2
// CHECK-LABEL: define <4 x i32> @test_vcvtpq_u32_f32(<4 x float> %a) #1 {
// CHECK: [[VCVTPQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %a) #3
// CHECK: ret <4 x i32> [[VCVTPQ_U32_V1_I]]
uint32x4_t test_vcvtpq_u32_f32(float32x4_t a) {
return vcvtpq_u32_f32(a);
}
// CHECK-LABEL: define <2 x i32> @test_vcvtm_s32_f32(<2 x float> %a) #0 {
// CHECK: [[VCVTM_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a) #2
// CHECK: [[VCVTM_S32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %a) #3
// CHECK: ret <2 x i32> [[VCVTM_S32_V1_I]]
int32x2_t test_vcvtm_s32_f32(float32x2_t a) {
return vcvtm_s32_f32(a);
}
// CHECK-LABEL: define <2 x i32> @test_vcvtm_u32_f32(<2 x float> %a) #0 {
// CHECK: [[VCVTM_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a) #2
// CHECK: [[VCVTM_U32_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %a) #3
// CHECK: ret <2 x i32> [[VCVTM_U32_V1_I]]
uint32x2_t test_vcvtm_u32_f32(float32x2_t a) {
return vcvtm_u32_f32(a);
}
// CHECK-LABEL: define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #0 {
// CHECK: [[VCVTMQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a) #2
// CHECK-LABEL: define <4 x i32> @test_vcvtmq_s32_f32(<4 x float> %a) #1 {
// CHECK: [[VCVTMQ_S32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %a) #3
// CHECK: ret <4 x i32> [[VCVTMQ_S32_V1_I]]
int32x4_t test_vcvtmq_s32_f32(float32x4_t a) {
return vcvtmq_s32_f32(a);
}
// CHECK-LABEL: define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #0 {
// CHECK: [[VCVTMQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a) #2
// CHECK-LABEL: define <4 x i32> @test_vcvtmq_u32_f32(<4 x float> %a) #1 {
// CHECK: [[VCVTMQ_U32_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %a) #3
// CHECK: ret <4 x i32> [[VCVTMQ_U32_V1_I]]
uint32x4_t test_vcvtmq_u32_f32(float32x4_t a) {
return vcvtmq_u32_f32(a);
}
// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"

View File

@ -44,7 +44,7 @@ float64x2_t test_vmovq_n_f64(float64_t w) {
return vmovq_n_f64(w);
}
// CHECK-LABEL: define <4 x half> @test_vmov_n_f16(half* %a1) #0 {
// CHECK-LABEL: define <4 x half> @test_vmov_n_f16(half* %a1) #1 {
// CHECK: [[TMP0:%.*]] = load half, half* %a1, align 2
// CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
// CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
@ -76,3 +76,5 @@ float16x8_t test_vmovq_n_f16(float16_t *a1) {
return vmovq_n_f16(*a1);
}
// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="128"
// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="64"

View File

@ -0,0 +1,61 @@
// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -emit-llvm %s -o - | FileCheck %s
typedef signed long long V2LLi __attribute__((vector_size(16)));
typedef signed long long V4LLi __attribute__((vector_size(32)));
V2LLi ret_128();
V4LLi ret_256();
void arg_128(V2LLi);
void arg_256(V4LLi);
// Make sure return type forces a min-legal-width
V2LLi foo(void) {
return (V2LLi){ 0, 0 };
}
V4LLi goo(void) {
return (V4LLi){ 0, 0 };
}
// Make sure return type of called function forces a min-legal-width
void hoo(void) {
V2LLi tmp_V2LLi;
tmp_V2LLi = ret_128();
}
void joo(void) {
V4LLi tmp_V4LLi;
tmp_V4LLi = ret_256();
}
// Make sure arg type of called function forces a min-legal-width
void koo(void) {
V2LLi tmp_V2LLi;
arg_128(tmp_V2LLi);
}
void loo(void) {
V4LLi tmp_V4LLi;
arg_256(tmp_V4LLi);
}
// Make sure arg type of our function forces a min-legal-width
void moo(V2LLi x) {
}
void noo(V4LLi x) {
}
// CHECK: {{.*}}@foo{{.*}} #0
// CHECK: {{.*}}@goo{{.*}} #1
// CHECK: {{.*}}@hoo{{.*}} #0
// CHECK: {{.*}}@joo{{.*}} #1
// CHECK: {{.*}}@koo{{.*}} #0
// CHECK: {{.*}}@loo{{.*}} #1
// CHECK: {{.*}}@moo{{.*}} #0
// CHECK: {{.*}}@noo{{.*}} #1
// CHECK: #0 = {{.*}}"min-legal-vector-width"="128"
// CHECK: #1 = {{.*}}"min-legal-vector-width"="256"

View File

@ -16,7 +16,7 @@ float spscalardiv(float a, float b) {
float4 spvectordiv(float4 a, float4 b) {
// CHECK: @spvectordiv
// CHECK: #[[ATTR]]
// CHECK: #[[ATTR2:[0-9]+]]
// CHECK: fdiv{{.*}},
// NODIVOPT: !fpmath ![[MD]]
// DIVOPT-NOT: !fpmath ![[MD]]
@ -45,7 +45,11 @@ double dpscalardiv(double a, double b) {
#endif
// CHECK: attributes #[[ATTR]] = {
// NODIVOPT: "correctly-rounded-divide-sqrt-fp-math"="false"
// DIVOPT: "correctly-rounded-divide-sqrt-fp-math"="true"
// CHECK: }
// NODIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="false"
// DIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="true"
// CHECK-SAME: }
// CHECK: attributes #[[ATTR2]] = {
// NODIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="false"
// DIVOPT-SAME: "correctly-rounded-divide-sqrt-fp-math"="true"
// CHECK-SAME: }
// NODIVOPT: ![[MD]] = !{float 2.500000e+00}