|
|
|
@ -4486,29 +4486,29 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
|
|
|
|
|
switch (TypeFlags.getEltType()) {
|
|
|
|
|
case NeonTypeFlags::Int8:
|
|
|
|
|
case NeonTypeFlags::Poly8:
|
|
|
|
|
return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
|
|
|
|
|
return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
|
|
|
|
|
case NeonTypeFlags::Int16:
|
|
|
|
|
case NeonTypeFlags::Poly16:
|
|
|
|
|
return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
|
|
|
|
|
return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
|
|
|
|
|
case NeonTypeFlags::Float16:
|
|
|
|
|
if (HasLegalHalfType)
|
|
|
|
|
return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
|
|
|
|
|
return llvm::FixedVectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
|
|
|
|
|
else
|
|
|
|
|
return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
|
|
|
|
|
return llvm::FixedVectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
|
|
|
|
|
case NeonTypeFlags::Int32:
|
|
|
|
|
return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
|
|
|
|
|
return llvm::FixedVectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
|
|
|
|
|
case NeonTypeFlags::Int64:
|
|
|
|
|
case NeonTypeFlags::Poly64:
|
|
|
|
|
return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
|
|
|
|
|
return llvm::FixedVectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
|
|
|
|
|
case NeonTypeFlags::Poly128:
|
|
|
|
|
// FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
|
|
|
|
|
// There is a lot of i128 and f128 API missing.
|
|
|
|
|
// so we use v16i8 to represent poly128 and get pattern matched.
|
|
|
|
|
return llvm::VectorType::get(CGF->Int8Ty, 16);
|
|
|
|
|
return llvm::FixedVectorType::get(CGF->Int8Ty, 16);
|
|
|
|
|
case NeonTypeFlags::Float32:
|
|
|
|
|
return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
|
|
|
|
|
return llvm::FixedVectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
|
|
|
|
|
case NeonTypeFlags::Float64:
|
|
|
|
|
return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
|
|
|
|
|
return llvm::FixedVectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
|
|
|
|
|
}
|
|
|
|
|
llvm_unreachable("Unknown vector element type!");
|
|
|
|
|
}
|
|
|
|
@ -4518,11 +4518,11 @@ static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
|
|
|
|
|
int IsQuad = IntTypeFlags.isQuad();
|
|
|
|
|
switch (IntTypeFlags.getEltType()) {
|
|
|
|
|
case NeonTypeFlags::Int16:
|
|
|
|
|
return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad));
|
|
|
|
|
return llvm::FixedVectorType::get(CGF->HalfTy, (4 << IsQuad));
|
|
|
|
|
case NeonTypeFlags::Int32:
|
|
|
|
|
return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
|
|
|
|
|
return llvm::FixedVectorType::get(CGF->FloatTy, (2 << IsQuad));
|
|
|
|
|
case NeonTypeFlags::Int64:
|
|
|
|
|
return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
|
|
|
|
|
return llvm::FixedVectorType::get(CGF->DoubleTy, (1 << IsQuad));
|
|
|
|
|
default:
|
|
|
|
|
llvm_unreachable("Type can't be converted to floating-point!");
|
|
|
|
|
}
|
|
|
|
@ -5403,7 +5403,7 @@ Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
|
|
|
|
|
if (Modifier & AddRetType) {
|
|
|
|
|
llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
|
|
|
|
|
if (Modifier & VectorizeRetType)
|
|
|
|
|
Ty = llvm::VectorType::get(
|
|
|
|
|
Ty = llvm::FixedVectorType::get(
|
|
|
|
|
Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
|
|
|
|
|
|
|
|
|
|
Tys.push_back(Ty);
|
|
|
|
@ -5412,7 +5412,7 @@ Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
|
|
|
|
|
// Arguments.
|
|
|
|
|
if (Modifier & VectorizeArgTypes) {
|
|
|
|
|
int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
|
|
|
|
|
ArgType = llvm::VectorType::get(ArgType, Elts);
|
|
|
|
|
ArgType = llvm::FixedVectorType::get(ArgType, Elts);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (Modifier & (Add1ArgType | Add2ArgTypes))
|
|
|
|
@ -5586,7 +5586,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
|
|
|
|
|
Ty = HalfTy;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements());
|
|
|
|
|
auto *VecFlt = llvm::FixedVectorType::get(Ty, VTy->getNumElements());
|
|
|
|
|
llvm::Type *Tys[] = { VTy, VecFlt };
|
|
|
|
|
Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
|
|
|
|
|
return EmitNeonCall(F, Ops, NameHint);
|
|
|
|
@ -5846,8 +5846,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
|
|
|
|
|
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
|
|
|
|
|
llvm::Type *EltTy =
|
|
|
|
|
llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
|
|
|
|
|
llvm::Type *NarrowTy =
|
|
|
|
|
llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
|
|
|
|
|
auto *NarrowTy =
|
|
|
|
|
llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, NarrowTy };
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
|
|
|
|
|
}
|
|
|
|
@ -5856,8 +5856,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
|
|
|
|
|
// The source operand type has twice as many elements of half the size.
|
|
|
|
|
unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
|
|
|
|
|
llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
|
|
|
|
|
llvm::Type *NarrowTy =
|
|
|
|
|
llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
|
|
|
|
|
auto *NarrowTy =
|
|
|
|
|
llvm::FixedVectorType::get(EltTy, VTy->getNumElements() * 2);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, NarrowTy };
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
|
|
|
|
|
}
|
|
|
|
@ -5876,8 +5876,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
|
|
|
|
|
auto *RTy = cast<llvm::VectorType>(Ty);
|
|
|
|
|
if (BuiltinID == NEON::BI__builtin_neon_vqdmulhq_lane_v ||
|
|
|
|
|
BuiltinID == NEON::BI__builtin_neon_vqrdmulhq_lane_v)
|
|
|
|
|
RTy = llvm::VectorType::get(RTy->getElementType(),
|
|
|
|
|
RTy->getNumElements() * 2);
|
|
|
|
|
RTy = llvm::FixedVectorType::get(RTy->getElementType(),
|
|
|
|
|
RTy->getNumElements() * 2);
|
|
|
|
|
llvm::Type *Tys[2] = {
|
|
|
|
|
RTy, GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
|
|
|
|
|
/*isQuad*/ false))};
|
|
|
|
@ -6064,57 +6064,57 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
|
|
|
|
|
}
|
|
|
|
|
case NEON::BI__builtin_neon_vdot_v:
|
|
|
|
|
case NEON::BI__builtin_neon_vdotq_v: {
|
|
|
|
|
llvm::Type *InputTy =
|
|
|
|
|
llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
|
|
|
|
|
auto *InputTy =
|
|
|
|
|
llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, InputTy };
|
|
|
|
|
Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
|
|
|
|
|
}
|
|
|
|
|
case NEON::BI__builtin_neon_vfmlal_low_v:
|
|
|
|
|
case NEON::BI__builtin_neon_vfmlalq_low_v: {
|
|
|
|
|
llvm::Type *InputTy =
|
|
|
|
|
llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
|
|
|
|
|
auto *InputTy =
|
|
|
|
|
llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, InputTy };
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
|
|
|
|
|
}
|
|
|
|
|
case NEON::BI__builtin_neon_vfmlsl_low_v:
|
|
|
|
|
case NEON::BI__builtin_neon_vfmlslq_low_v: {
|
|
|
|
|
llvm::Type *InputTy =
|
|
|
|
|
llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
|
|
|
|
|
auto *InputTy =
|
|
|
|
|
llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, InputTy };
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
|
|
|
|
|
}
|
|
|
|
|
case NEON::BI__builtin_neon_vfmlal_high_v:
|
|
|
|
|
case NEON::BI__builtin_neon_vfmlalq_high_v: {
|
|
|
|
|
llvm::Type *InputTy =
|
|
|
|
|
llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
|
|
|
|
|
auto *InputTy =
|
|
|
|
|
llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, InputTy };
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
|
|
|
|
|
}
|
|
|
|
|
case NEON::BI__builtin_neon_vfmlsl_high_v:
|
|
|
|
|
case NEON::BI__builtin_neon_vfmlslq_high_v: {
|
|
|
|
|
llvm::Type *InputTy =
|
|
|
|
|
llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
|
|
|
|
|
auto *InputTy =
|
|
|
|
|
llvm::FixedVectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, InputTy };
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
|
|
|
|
|
}
|
|
|
|
|
case NEON::BI__builtin_neon_vmmlaq_v: {
|
|
|
|
|
llvm::Type *InputTy =
|
|
|
|
|
llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
|
|
|
|
|
auto *InputTy =
|
|
|
|
|
llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, InputTy };
|
|
|
|
|
Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmmla");
|
|
|
|
|
}
|
|
|
|
|
case NEON::BI__builtin_neon_vusmmlaq_v: {
|
|
|
|
|
llvm::Type *InputTy =
|
|
|
|
|
llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
|
|
|
|
|
auto *InputTy =
|
|
|
|
|
llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, InputTy };
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusmmla");
|
|
|
|
|
}
|
|
|
|
|
case NEON::BI__builtin_neon_vusdot_v:
|
|
|
|
|
case NEON::BI__builtin_neon_vusdotq_v: {
|
|
|
|
|
llvm::Type *InputTy =
|
|
|
|
|
llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
|
|
|
|
|
auto *InputTy =
|
|
|
|
|
llvm::FixedVectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, InputTy };
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vusdot");
|
|
|
|
|
}
|
|
|
|
@ -7003,7 +7003,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
|
|
|
|
|
Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
|
|
|
|
|
// Load the value as a one-element vector.
|
|
|
|
|
Ty = llvm::VectorType::get(VTy->getElementType(), 1);
|
|
|
|
|
Ty = llvm::FixedVectorType::get(VTy->getElementType(), 1);
|
|
|
|
|
llvm::Type *Tys[] = {Ty, Int8PtrTy};
|
|
|
|
|
Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
|
|
|
|
|
Value *Align = getAlignmentValue32(PtrOp0);
|
|
|
|
@ -7497,7 +7497,7 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
|
|
|
|
|
llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
|
|
|
|
|
auto *VTy = llvm::FixedVectorType::get(Int16Ty, 4);
|
|
|
|
|
Op = Builder.CreateBitCast(Op, Int16Ty);
|
|
|
|
|
Value *V = UndefValue::get(VTy);
|
|
|
|
|
llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
|
|
|
|
@ -8867,7 +8867,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
|
|
|
|
|
}
|
|
|
|
|
case NEON::BI__builtin_neon_vpaddd_s64: {
|
|
|
|
|
llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
|
|
|
|
|
auto *Ty = llvm::FixedVectorType::get(Int64Ty, 2);
|
|
|
|
|
Value *Vec = EmitScalarExpr(E->getArg(0));
|
|
|
|
|
// The vector is v2f64, so make sure it's bitcast to that.
|
|
|
|
|
Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
|
|
|
|
@ -8879,8 +8879,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
return Builder.CreateAdd(Op0, Op1, "vpaddd");
|
|
|
|
|
}
|
|
|
|
|
case NEON::BI__builtin_neon_vpaddd_f64: {
|
|
|
|
|
llvm::Type *Ty =
|
|
|
|
|
llvm::VectorType::get(DoubleTy, 2);
|
|
|
|
|
auto *Ty = llvm::FixedVectorType::get(DoubleTy, 2);
|
|
|
|
|
Value *Vec = EmitScalarExpr(E->getArg(0));
|
|
|
|
|
// The vector is v2f64, so make sure it's bitcast to that.
|
|
|
|
|
Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
|
|
|
|
@ -8892,8 +8891,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
return Builder.CreateFAdd(Op0, Op1, "vpaddd");
|
|
|
|
|
}
|
|
|
|
|
case NEON::BI__builtin_neon_vpadds_f32: {
|
|
|
|
|
llvm::Type *Ty =
|
|
|
|
|
llvm::VectorType::get(FloatTy, 2);
|
|
|
|
|
auto *Ty = llvm::FixedVectorType::get(FloatTy, 2);
|
|
|
|
|
Value *Vec = EmitScalarExpr(E->getArg(0));
|
|
|
|
|
// The vector is v2f32, so make sure it's bitcast to that.
|
|
|
|
|
Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
|
|
|
|
@ -9066,87 +9064,95 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vset_lane_f64:
|
|
|
|
|
// The vector type needs a cast for the v1f64 variant.
|
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1],
|
|
|
|
|
llvm::VectorType::get(DoubleTy, 1));
|
|
|
|
|
Ops[1] =
|
|
|
|
|
Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 1));
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(2)));
|
|
|
|
|
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vsetq_lane_f64:
|
|
|
|
|
// The vector type needs a cast for the v2f64 variant.
|
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1],
|
|
|
|
|
llvm::VectorType::get(DoubleTy, 2));
|
|
|
|
|
Ops[1] =
|
|
|
|
|
Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(DoubleTy, 2));
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(2)));
|
|
|
|
|
return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
|
|
|
|
|
|
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i8:
|
|
|
|
|
case NEON::BI__builtin_neon_vdupb_lane_i8:
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 8));
|
|
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
|
"vget_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i8:
|
|
|
|
|
case NEON::BI__builtin_neon_vdupb_laneq_i8:
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int8Ty, 16));
|
|
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
|
"vgetq_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i16:
|
|
|
|
|
case NEON::BI__builtin_neon_vduph_lane_i16:
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 4));
|
|
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
|
"vget_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i16:
|
|
|
|
|
case NEON::BI__builtin_neon_vduph_laneq_i16:
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int16Ty, 8));
|
|
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
|
"vgetq_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i32:
|
|
|
|
|
case NEON::BI__builtin_neon_vdups_lane_i32:
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 2));
|
|
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
|
"vget_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vdups_lane_f32:
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
|
|
|
|
llvm::VectorType::get(FloatTy, 2));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
|
|
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
|
"vdups_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i32:
|
|
|
|
|
case NEON::BI__builtin_neon_vdups_laneq_i32:
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
|
|
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
|
"vgetq_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_i64:
|
|
|
|
|
case NEON::BI__builtin_neon_vdupd_lane_i64:
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 1));
|
|
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
|
"vget_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vdupd_lane_f64:
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
|
|
|
|
llvm::VectorType::get(DoubleTy, 1));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
|
|
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
|
"vdupd_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_i64:
|
|
|
|
|
case NEON::BI__builtin_neon_vdupd_laneq_i64:
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
|
|
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
|
"vgetq_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_f32:
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
|
|
|
|
llvm::VectorType::get(FloatTy, 2));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 2));
|
|
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
|
"vget_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vget_lane_f64:
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
|
|
|
|
llvm::VectorType::get(DoubleTy, 1));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 1));
|
|
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
|
"vget_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_f32:
|
|
|
|
|
case NEON::BI__builtin_neon_vdups_laneq_f32:
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
|
|
|
|
llvm::VectorType::get(FloatTy, 4));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(FloatTy, 4));
|
|
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
|
"vgetq_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vgetq_lane_f64:
|
|
|
|
|
case NEON::BI__builtin_neon_vdupd_laneq_f64:
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0],
|
|
|
|
|
llvm::VectorType::get(DoubleTy, 2));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(DoubleTy, 2));
|
|
|
|
|
return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
|
|
|
|
|
"vgetq_lane");
|
|
|
|
|
case NEON::BI__builtin_neon_vaddh_f16:
|
|
|
|
@ -9187,7 +9193,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
SmallVector<Value *, 2> ProductOps;
|
|
|
|
|
ProductOps.push_back(vectorWrapScalar16(Ops[1]));
|
|
|
|
|
ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
|
|
|
|
|
llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
|
|
|
|
|
auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
|
|
|
|
|
Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
|
|
|
|
|
ProductOps, "vqdmlXl");
|
|
|
|
|
Constant *CI = ConstantInt::get(SizeTy, 0);
|
|
|
|
@ -9284,7 +9290,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
SmallVector<Value *, 2> ProductOps;
|
|
|
|
|
ProductOps.push_back(vectorWrapScalar16(Ops[1]));
|
|
|
|
|
ProductOps.push_back(vectorWrapScalar16(Ops[2]));
|
|
|
|
|
llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
|
|
|
|
|
auto *VTy = llvm::FixedVectorType::get(Int32Ty, 4);
|
|
|
|
|
Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
|
|
|
|
|
ProductOps, "vqdmlXl");
|
|
|
|
|
Constant *CI = ConstantInt::get(SizeTy, 0);
|
|
|
|
@ -9532,9 +9538,10 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
Ops[2] = Addend;
|
|
|
|
|
|
|
|
|
|
// Now adjust things to handle the lane access.
|
|
|
|
|
llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
|
|
|
|
|
llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
|
|
|
|
|
VTy;
|
|
|
|
|
auto *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v
|
|
|
|
|
? llvm::FixedVectorType::get(VTy->getElementType(),
|
|
|
|
|
VTy->getNumElements() / 2)
|
|
|
|
|
: VTy;
|
|
|
|
|
llvm::Constant *cst = cast<Constant>(Ops[3]);
|
|
|
|
|
Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(), cst);
|
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
|
|
|
|
@ -9564,8 +9571,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
|
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
|
|
|
|
|
|
llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
|
|
|
|
|
VTy->getNumElements() * 2);
|
|
|
|
|
auto *STy = llvm::FixedVectorType::get(VTy->getElementType(),
|
|
|
|
|
VTy->getNumElements() * 2);
|
|
|
|
|
Ops[2] = Builder.CreateBitCast(Ops[2], STy);
|
|
|
|
|
Value *SV = llvm::ConstantVector::getSplat(VTy->getElementCount(),
|
|
|
|
|
cast<ConstantInt>(Ops[3]));
|
|
|
|
@ -9636,8 +9643,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
unsigned ArgElts = VTy->getNumElements();
|
|
|
|
|
llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
|
|
|
|
|
unsigned BitWidth = EltTy->getBitWidth();
|
|
|
|
|
llvm::Type *ArgTy = llvm::VectorType::get(
|
|
|
|
|
llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
|
|
|
|
|
auto *ArgTy = llvm::FixedVectorType::get(
|
|
|
|
|
llvm::IntegerType::get(getLLVMContext(), BitWidth / 2), 2 * ArgElts);
|
|
|
|
|
llvm::Type* Tys[2] = { VTy, ArgTy };
|
|
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
|
|
|
|
|
SmallVector<llvm::Value*, 1> TmpOps;
|
|
|
|
@ -9968,7 +9975,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vaddv_s8: {
|
|
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int8Ty, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
|
|
|
|
@ -9980,7 +9987,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vaddv_s16: {
|
|
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 4);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int16Ty, 4);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
|
|
|
|
@ -9992,7 +9999,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vaddvq_s8: {
|
|
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 16);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int8Ty, 16);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
|
|
|
|
@ -10004,7 +10011,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vaddvq_s16: {
|
|
|
|
|
Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int16Ty, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
|
|
|
|
@ -10013,7 +10020,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vmaxv_u8: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_umaxv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int8Ty, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
|
|
|
@ -10022,7 +10029,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vmaxv_u16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_umaxv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 4);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int16Ty, 4);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
|
|
|
@ -10031,7 +10038,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vmaxvq_u8: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_umaxv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 16);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int8Ty, 16);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
|
|
|
@ -10040,7 +10047,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vmaxvq_u16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_umaxv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int16Ty, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
|
|
|
@ -10049,7 +10056,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vmaxv_s8: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_smaxv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int8Ty, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
|
|
|
@ -10058,7 +10065,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vmaxv_s16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_smaxv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 4);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int16Ty, 4);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
|
|
|
@ -10067,7 +10074,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vmaxvq_s8: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_smaxv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 16);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int8Ty, 16);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
|
|
|
@ -10076,7 +10083,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vmaxvq_s16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_smaxv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int16Ty, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
|
|
|
@ -10085,7 +10092,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vmaxv_f16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_fmaxv;
|
|
|
|
|
Ty = HalfTy;
|
|
|
|
|
VTy = llvm::VectorType::get(HalfTy, 4);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(HalfTy, 4);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
|
|
|
@ -10094,7 +10101,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vmaxvq_f16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_fmaxv;
|
|
|
|
|
Ty = HalfTy;
|
|
|
|
|
VTy = llvm::VectorType::get(HalfTy, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(HalfTy, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
|
|
|
|
@ -10103,7 +10110,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vminv_u8: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_uminv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int8Ty, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
|
|
|
@ -10112,7 +10119,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vminv_u16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_uminv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 4);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int16Ty, 4);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
|
|
|
@ -10121,7 +10128,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vminvq_u8: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_uminv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 16);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int8Ty, 16);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
|
|
|
@ -10130,7 +10137,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vminvq_u16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_uminv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int16Ty, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
|
|
|
@ -10139,7 +10146,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vminv_s8: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_sminv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int8Ty, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
|
|
|
@ -10148,7 +10155,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vminv_s16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_sminv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 4);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int16Ty, 4);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
|
|
|
@ -10157,7 +10164,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vminvq_s8: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_sminv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 16);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int8Ty, 16);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
|
|
|
@ -10166,7 +10173,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vminvq_s16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_sminv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int16Ty, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
|
|
|
@ -10175,7 +10182,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vminv_f16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_fminv;
|
|
|
|
|
Ty = HalfTy;
|
|
|
|
|
VTy = llvm::VectorType::get(HalfTy, 4);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(HalfTy, 4);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
|
|
|
@ -10184,7 +10191,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vminvq_f16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_fminv;
|
|
|
|
|
Ty = HalfTy;
|
|
|
|
|
VTy = llvm::VectorType::get(HalfTy, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(HalfTy, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
|
|
|
|
@ -10193,7 +10200,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vmaxnmv_f16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_fmaxnmv;
|
|
|
|
|
Ty = HalfTy;
|
|
|
|
|
VTy = llvm::VectorType::get(HalfTy, 4);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(HalfTy, 4);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
|
|
|
|
@ -10202,7 +10209,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vmaxnmvq_f16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_fmaxnmv;
|
|
|
|
|
Ty = HalfTy;
|
|
|
|
|
VTy = llvm::VectorType::get(HalfTy, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(HalfTy, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
|
|
|
|
@ -10211,7 +10218,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vminnmv_f16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_fminnmv;
|
|
|
|
|
Ty = HalfTy;
|
|
|
|
|
VTy = llvm::VectorType::get(HalfTy, 4);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(HalfTy, 4);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
|
|
|
|
@ -10220,7 +10227,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vminnmvq_f16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_fminnmv;
|
|
|
|
|
Ty = HalfTy;
|
|
|
|
|
VTy = llvm::VectorType::get(HalfTy, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(HalfTy, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
|
|
|
|
@ -10234,7 +10241,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vaddlv_u8: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_uaddlv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int8Ty, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
|
|
|
@ -10243,7 +10250,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vaddlv_u16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_uaddlv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 4);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int16Ty, 4);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
|
|
|
@ -10251,7 +10258,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vaddlvq_u8: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_uaddlv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 16);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int8Ty, 16);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
|
|
|
@ -10260,7 +10267,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vaddlvq_u16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_uaddlv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int16Ty, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
|
|
|
@ -10268,7 +10275,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vaddlv_s8: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_saddlv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int8Ty, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
|
|
|
@ -10277,7 +10284,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vaddlv_s16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_saddlv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 4);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int16Ty, 4);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
|
|
|
@ -10285,7 +10292,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vaddlvq_s8: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_saddlv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int8Ty, 16);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int8Ty, 16);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
|
|
|
@ -10294,7 +10301,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case NEON::BI__builtin_neon_vaddlvq_s16: {
|
|
|
|
|
Int = Intrinsic::aarch64_neon_saddlv;
|
|
|
|
|
Ty = Int32Ty;
|
|
|
|
|
VTy = llvm::VectorType::get(Int16Ty, 8);
|
|
|
|
|
VTy = llvm::FixedVectorType::get(Int16Ty, 8);
|
|
|
|
|
llvm::Type *Tys[2] = { Ty, VTy };
|
|
|
|
|
Ops.push_back(EmitScalarExpr(E->getArg(0)));
|
|
|
|
|
return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
|
|
|
|
@ -10754,8 +10761,8 @@ BuildVector(ArrayRef<llvm::Value*> Ops) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Otherwise, insertelement the values to build the vector.
|
|
|
|
|
Value *Result =
|
|
|
|
|
llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
|
|
|
|
|
Value *Result = llvm::UndefValue::get(
|
|
|
|
|
llvm::FixedVectorType::get(Ops[0]->getType(), Ops.size()));
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
|
|
|
|
|
Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
|
|
|
|
@ -10767,8 +10774,9 @@ BuildVector(ArrayRef<llvm::Value*> Ops) {
|
|
|
|
|
static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
|
|
|
|
|
unsigned NumElts) {
|
|
|
|
|
|
|
|
|
|
llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
|
|
|
|
|
cast<IntegerType>(Mask->getType())->getBitWidth());
|
|
|
|
|
auto *MaskTy = llvm::FixedVectorType::get(
|
|
|
|
|
CGF.Builder.getInt1Ty(),
|
|
|
|
|
cast<IntegerType>(Mask->getType())->getBitWidth());
|
|
|
|
|
Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
|
|
|
|
|
|
|
|
|
|
// If we have less than 8 elements, then the starting mask was an i8 and
|
|
|
|
@ -10946,9 +10954,8 @@ static Value *EmitX86ScalarSelect(CodeGenFunction &CGF,
|
|
|
|
|
if (C->isAllOnesValue())
|
|
|
|
|
return Op0;
|
|
|
|
|
|
|
|
|
|
llvm::VectorType *MaskTy =
|
|
|
|
|
llvm::VectorType::get(CGF.Builder.getInt1Ty(),
|
|
|
|
|
Mask->getType()->getIntegerBitWidth());
|
|
|
|
|
auto *MaskTy = llvm::FixedVectorType::get(
|
|
|
|
|
CGF.Builder.getInt1Ty(), Mask->getType()->getIntegerBitWidth());
|
|
|
|
|
Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
|
|
|
|
|
Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
|
|
|
|
|
return CGF.Builder.CreateSelect(Mask, Op0, Op1);
|
|
|
|
@ -10987,10 +10994,10 @@ static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
|
|
|
|
|
|
|
|
|
|
if (CC == 3) {
|
|
|
|
|
Cmp = Constant::getNullValue(
|
|
|
|
|
llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
|
|
|
|
|
llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
|
|
|
|
|
} else if (CC == 7) {
|
|
|
|
|
Cmp = Constant::getAllOnesValue(
|
|
|
|
|
llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
|
|
|
|
|
llvm::FixedVectorType::get(CGF.Builder.getInt1Ty(), NumElts));
|
|
|
|
|
} else {
|
|
|
|
|
ICmpInst::Predicate Pred;
|
|
|
|
|
switch (CC) {
|
|
|
|
@ -11204,8 +11211,8 @@ static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
|
|
|
|
|
ArrayRef<Value *> Ops) {
|
|
|
|
|
llvm::Type *Ty = Ops[0]->getType();
|
|
|
|
|
// Arguments have a vXi32 type so cast to vXi64.
|
|
|
|
|
Ty = llvm::VectorType::get(CGF.Int64Ty,
|
|
|
|
|
Ty->getPrimitiveSizeInBits() / 64);
|
|
|
|
|
Ty = llvm::FixedVectorType::get(CGF.Int64Ty,
|
|
|
|
|
Ty->getPrimitiveSizeInBits() / 64);
|
|
|
|
|
Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
|
|
|
|
|
Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
|
|
|
|
|
|
|
|
|
@ -11306,7 +11313,7 @@ static Value *EmitX86CvtF16ToFloatExpr(CodeGenFunction &CGF,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Bitcast from vXi16 to vXf16.
|
|
|
|
|
llvm::Type *HalfTy = llvm::VectorType::get(
|
|
|
|
|
auto *HalfTy = llvm::FixedVectorType::get(
|
|
|
|
|
llvm::Type::getHalfTy(CGF.getLLVMContext()), NumDstElts);
|
|
|
|
|
Src = CGF.Builder.CreateBitCast(Src, HalfTy);
|
|
|
|
|
|
|
|
|
@ -12571,7 +12578,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
|
|
|
|
|
auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
|
|
|
|
|
Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
|
|
|
|
|
Value *Zero = llvm::Constant::getNullValue(VecTy);
|
|
|
|
|
Value *SV = Builder.CreateShuffleVector(Zero, Cast,
|
|
|
|
@ -12601,7 +12608,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
|
|
|
|
|
auto *VecTy = llvm::FixedVectorType::get(Int8Ty, NumElts);
|
|
|
|
|
Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
|
|
|
|
|
Value *Zero = llvm::Constant::getNullValue(VecTy);
|
|
|
|
|
Value *SV = Builder.CreateShuffleVector(Cast, Zero,
|
|
|
|
@ -14079,11 +14086,13 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
|
|
|
|
|
// Need to cast the second argument from a vector of unsigned int to a
|
|
|
|
|
// vector of long long.
|
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
|
|
|
|
|
Ops[1] =
|
|
|
|
|
Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2));
|
|
|
|
|
|
|
|
|
|
if (getTarget().isLittleEndian()) {
|
|
|
|
|
// Reverse the double words in the vector we will extract from.
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
|
|
|
|
|
Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{1, 0});
|
|
|
|
|
|
|
|
|
|
// Reverse the index.
|
|
|
|
@ -14091,7 +14100,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Intrinsic expects the first arg to be a vector of int.
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
|
|
|
|
|
Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
|
|
|
|
|
return Builder.CreateCall(F, Ops);
|
|
|
|
|
}
|
|
|
|
@ -14100,7 +14110,8 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
|
|
|
|
|
|
|
|
|
|
// Intrinsic expects the first argument to be a vector of doublewords.
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
|
|
|
|
|
|
|
|
|
|
// The second argument is a compile time constant int that needs to
|
|
|
|
|
// be clamped to the range [0, 12].
|
|
|
|
@ -14132,8 +14143,10 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
assert(ArgCI && "Third arg must be constant integer!");
|
|
|
|
|
|
|
|
|
|
unsigned Index = ArgCI->getZExtValue();
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
|
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
|
|
|
|
|
Ops[1] =
|
|
|
|
|
Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int64Ty, 2));
|
|
|
|
|
|
|
|
|
|
// Account for endianness by treating this as just a shuffle. So we use the
|
|
|
|
|
// same indices for both LE and BE in order to produce expected results in
|
|
|
|
@ -14153,8 +14166,10 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
|
|
|
|
|
assert(ArgCI && "Third argument must be a compile time constant");
|
|
|
|
|
unsigned Index = ArgCI->getZExtValue() & 0x3;
|
|
|
|
|
Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
|
|
|
|
|
Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
|
|
|
|
|
Ops[0] =
|
|
|
|
|
Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
|
|
|
|
|
Ops[1] =
|
|
|
|
|
Builder.CreateBitCast(Ops[1], llvm::FixedVectorType::get(Int32Ty, 4));
|
|
|
|
|
|
|
|
|
|
// Create a shuffle mask
|
|
|
|
|
int ElemIdx0;
|
|
|
|
@ -14188,7 +14203,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case PPC::BI__builtin_pack_vector_int128: {
|
|
|
|
|
bool isLittleEndian = getTarget().isLittleEndian();
|
|
|
|
|
Value *UndefValue =
|
|
|
|
|
llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), 2));
|
|
|
|
|
llvm::UndefValue::get(llvm::FixedVectorType::get(Ops[0]->getType(), 2));
|
|
|
|
|
Value *Res = Builder.CreateInsertElement(
|
|
|
|
|
UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0));
|
|
|
|
|
Res = Builder.CreateInsertElement(Res, Ops[1],
|
|
|
|
@ -14199,7 +14214,7 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
|
|
|
|
|
case PPC::BI__builtin_unpack_vector_int128: {
|
|
|
|
|
ConstantInt *Index = cast<ConstantInt>(Ops[1]);
|
|
|
|
|
Value *Unpacked = Builder.CreateBitCast(
|
|
|
|
|
Ops[0], llvm::VectorType::get(ConvertType(E->getType()), 2));
|
|
|
|
|
Ops[0], llvm::FixedVectorType::get(ConvertType(E->getType()), 2));
|
|
|
|
|
|
|
|
|
|
if (getTarget().isLittleEndian())
|
|
|
|
|
Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
|
|
|
|
|