Implemented Neon scalar by element intrinsics.

Intrinsics implemented: vqdmull_lane, vqdmulh_lane, vqrdmulh_lane,
vqdmlal_lane, vqdmlsl_lane scalar Neon intrinsics.

llvm-svn: 195326
This commit is contained in:
Ana Pazos 2013-11-21 07:36:33 +00:00
parent 95f3e54066
commit 2b02688fd9
4 changed files with 265 additions and 21 deletions

View File

@ -119,6 +119,12 @@ def OP_SCALAR_MULX_LN : Op;
def OP_SCALAR_MULX_LNQ : Op; def OP_SCALAR_MULX_LNQ : Op;
def OP_SCALAR_VMULX_LN : Op; def OP_SCALAR_VMULX_LN : Op;
def OP_SCALAR_VMULX_LNQ : Op; def OP_SCALAR_VMULX_LNQ : Op;
def OP_SCALAR_QDMULL_LN : Op;
def OP_SCALAR_QDMULL_LNQ : Op;
def OP_SCALAR_QDMULH_LN : Op;
def OP_SCALAR_QDMULH_LNQ : Op;
def OP_SCALAR_QRDMULH_LN : Op;
def OP_SCALAR_QRDMULH_LNQ : Op;
class Inst <string n, string p, string t, Op o> { class Inst <string n, string p, string t, Op o> {
string Name = n; string Name = n;
@ -1244,4 +1250,25 @@ def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "sssji", "SfSd">;
// Scalar Floating Point fused multiply-subtract (scalar, by element) // Scalar Floating Point fused multiply-subtract (scalar, by element)
def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "sssdi", "SfSd", OP_FMS_LN>; def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "sssdi", "SfSd", OP_FMS_LN>;
def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "sssji", "SfSd", OP_FMS_LNQ>; def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "sssji", "SfSd", OP_FMS_LNQ>;
// Signed Saturating Doubling Multiply Long (scalar by element)
def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "rsdi", "SsSi", OP_SCALAR_QDMULL_LN>;
def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "rsji", "SsSi", OP_SCALAR_QDMULL_LNQ>;
// Signed Saturating Doubling Multiply-Add Long (scalar by element)
def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "rrsdi", "SsSi">;
def SCALAR_SQDMLAL_LANEQ : SInst<"vqdmlal_laneq", "rrsji", "SsSi">;
// Signed Saturating Doubling Multiply-Subtract Long (scalar by element)
def SCALAR_SQDMLS_LANE : SInst<"vqdmlsl_lane", "rrsdi", "SsSi">;
def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "rrsji", "SsSi">;
// Scalar Integer Saturating Doubling Multiply Half High (scalar by element)
def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QDMULH_LN>;
def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QDMULH_LNQ>;
// Scalar Integer Saturating Rounding Doubling Multiply Half High
def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QRDMULH_LN>;
def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LNQ>;
} }

View File

@ -1772,13 +1772,52 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
// argument that specifies the vector type, need to handle each case. // argument that specifies the vector type, need to handle each case.
switch (BuiltinID) { switch (BuiltinID) {
default: break; default: break;
case AArch64::BI__builtin_neon_vqdmlalh_lane_s16 :
case AArch64::BI__builtin_neon_vqdmlalh_laneq_s16 :
case AArch64::BI__builtin_neon_vqdmlals_lane_s32 :
case AArch64::BI__builtin_neon_vqdmlals_laneq_s32 :
case AArch64::BI__builtin_neon_vqdmlslh_lane_s16 :
case AArch64::BI__builtin_neon_vqdmlslh_laneq_s16 :
case AArch64::BI__builtin_neon_vqdmlsls_lane_s32 :
case AArch64::BI__builtin_neon_vqdmlsls_laneq_s32 : {
Int = Intrinsic::arm_neon_vqadds;
if (BuiltinID == AArch64::BI__builtin_neon_vqdmlslh_lane_s16 ||
BuiltinID == AArch64::BI__builtin_neon_vqdmlslh_laneq_s16 ||
BuiltinID == AArch64::BI__builtin_neon_vqdmlsls_lane_s32 ||
BuiltinID == AArch64::BI__builtin_neon_vqdmlsls_laneq_s32) {
Int = Intrinsic::arm_neon_vqsubs;
}
// create vqdmull call with b * c[i]
llvm::Type *Ty = CGF.ConvertType(E->getArg(1)->getType());
llvm::VectorType *OpVTy = llvm::VectorType::get(Ty, 1);
Ty = CGF.ConvertType(E->getArg(0)->getType());
llvm::VectorType *ResVTy = llvm::VectorType::get(Ty, 1);
Value *F = CGF.CGM.getIntrinsic(Intrinsic::arm_neon_vqdmull, ResVTy);
Value *V = UndefValue::get(OpVTy);
llvm::Constant *CI = ConstantInt::get(CGF.Int32Ty, 0);
SmallVector<Value *, 2> MulOps;
MulOps.push_back(Ops[1]);
MulOps.push_back(Ops[2]);
MulOps[0] = CGF.Builder.CreateInsertElement(V, MulOps[0], CI);
MulOps[1] = CGF.Builder.CreateExtractElement(MulOps[1], Ops[3], "extract");
MulOps[1] = CGF.Builder.CreateInsertElement(V, MulOps[1], CI);
Value *MulRes = CGF.Builder.CreateCall2(F, MulOps[0], MulOps[1]);
// create vqadds call with a +/- vqdmull result
F = CGF.CGM.getIntrinsic(Int, ResVTy);
SmallVector<Value *, 2> AddOps;
AddOps.push_back(Ops[0]);
AddOps.push_back(MulRes);
V = UndefValue::get(ResVTy);
AddOps[0] = CGF.Builder.CreateInsertElement(V, AddOps[0], CI);
Value *AddRes = CGF.Builder.CreateCall2(F, AddOps[0], AddOps[1]);
return CGF.Builder.CreateBitCast(AddRes, Ty);
}
case AArch64::BI__builtin_neon_vfmas_lane_f32: case AArch64::BI__builtin_neon_vfmas_lane_f32:
case AArch64::BI__builtin_neon_vfmas_laneq_f32: case AArch64::BI__builtin_neon_vfmas_laneq_f32:
case AArch64::BI__builtin_neon_vfmad_lane_f64: case AArch64::BI__builtin_neon_vfmad_lane_f64:
case AArch64::BI__builtin_neon_vfmad_laneq_f64: { case AArch64::BI__builtin_neon_vfmad_laneq_f64: {
llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType()); llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty); Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
// extract lane acc += x * v[i]
Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract"); Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]); return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
} }
@ -1857,26 +1896,26 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
case AArch64::BI__builtin_neon_vqaddh_s16: case AArch64::BI__builtin_neon_vqaddh_s16:
case AArch64::BI__builtin_neon_vqadds_s32: case AArch64::BI__builtin_neon_vqadds_s32:
case AArch64::BI__builtin_neon_vqaddd_s64: case AArch64::BI__builtin_neon_vqaddd_s64:
Int = Intrinsic::aarch64_neon_vqadds; Int = Intrinsic::arm_neon_vqadds;
s = "vqadds"; OverloadInt = true; break; s = "vqadds"; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqaddb_u8: case AArch64::BI__builtin_neon_vqaddb_u8:
case AArch64::BI__builtin_neon_vqaddh_u16: case AArch64::BI__builtin_neon_vqaddh_u16:
case AArch64::BI__builtin_neon_vqadds_u32: case AArch64::BI__builtin_neon_vqadds_u32:
case AArch64::BI__builtin_neon_vqaddd_u64: case AArch64::BI__builtin_neon_vqaddd_u64:
Int = Intrinsic::aarch64_neon_vqaddu; Int = Intrinsic::arm_neon_vqaddu;
s = "vqaddu"; OverloadInt = true; break; s = "vqaddu"; OverloadInt = true; break;
// Scalar Saturating Sub // Scalar Saturating Sub
case AArch64::BI__builtin_neon_vqsubb_s8: case AArch64::BI__builtin_neon_vqsubb_s8:
case AArch64::BI__builtin_neon_vqsubh_s16: case AArch64::BI__builtin_neon_vqsubh_s16:
case AArch64::BI__builtin_neon_vqsubs_s32: case AArch64::BI__builtin_neon_vqsubs_s32:
case AArch64::BI__builtin_neon_vqsubd_s64: case AArch64::BI__builtin_neon_vqsubd_s64:
Int = Intrinsic::aarch64_neon_vqsubs; Int = Intrinsic::arm_neon_vqsubs;
s = "vqsubs"; OverloadInt = true; break; s = "vqsubs"; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vqsubb_u8: case AArch64::BI__builtin_neon_vqsubb_u8:
case AArch64::BI__builtin_neon_vqsubh_u16: case AArch64::BI__builtin_neon_vqsubh_u16:
case AArch64::BI__builtin_neon_vqsubs_u32: case AArch64::BI__builtin_neon_vqsubs_u32:
case AArch64::BI__builtin_neon_vqsubd_u64: case AArch64::BI__builtin_neon_vqsubd_u64:
Int = Intrinsic::aarch64_neon_vqsubu; Int = Intrinsic::arm_neon_vqsubu;
s = "vqsubu"; OverloadInt = true; break; s = "vqsubu"; OverloadInt = true; break;
// Scalar Shift Left // Scalar Shift Left
case AArch64::BI__builtin_neon_vshld_s64: case AArch64::BI__builtin_neon_vshld_s64:
@ -2270,7 +2309,7 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
// Signed Saturating Doubling Multiply Long // Signed Saturating Doubling Multiply Long
case AArch64::BI__builtin_neon_vqdmullh_s16: case AArch64::BI__builtin_neon_vqdmullh_s16:
case AArch64::BI__builtin_neon_vqdmulls_s32: case AArch64::BI__builtin_neon_vqdmulls_s32:
Int = Intrinsic::aarch64_neon_vqdmull; Int = Intrinsic::arm_neon_vqdmull;
s = "vqdmull"; OverloadWideInt = true; break; s = "vqdmull"; OverloadWideInt = true; break;
// Scalar Signed Saturating Extract Unsigned Narrow // Scalar Signed Saturating Extract Unsigned Narrow
case AArch64::BI__builtin_neon_vqmovunh_s16: case AArch64::BI__builtin_neon_vqmovunh_s16:

View File

@ -61,71 +61,195 @@ float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) {
// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
} }
// CHECK_AARCH64: test_vmulx_lane_f64 // CHECK: test_vmulx_lane_f64
float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) { float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) {
return vmulx_lane_f64(a, b, 0); return vmulx_lane_f64(a, b, 0);
// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
} }
// CHECK_AARCH64: test_vmulx_laneq_f64_0 // CHECK: test_vmulx_laneq_f64_0
float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) { float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) {
return vmulx_laneq_f64(a, b, 0); return vmulx_laneq_f64(a, b, 0);
// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
} }
// CHECK_AARCH64: test_vmulx_laneq_f64_1 // CHECK: test_vmulx_laneq_f64_1
float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) { float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) {
return vmulx_laneq_f64(a, b, 1); return vmulx_laneq_f64(a, b, 1);
// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
} }
// CHECK_AARCH64: test_vfmas_lane_f32 // CHECK: test_vfmas_lane_f32
float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) { float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) {
return vfmas_lane_f32(a, b, c, 1); return vfmas_lane_f32(a, b, c, 1);
// CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] // CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
} }
// CHECK_AARCH64: test_vfmad_lane_f64 // CHECK: test_vfmad_lane_f64
float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) { float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) {
return vfmad_lane_f64(a, b, c, 0); return vfmad_lane_f64(a, b, c, 0);
// CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
} }
// CHECK_AARCH64: test_vfmad_laneq_f64 // CHECK: test_vfmad_laneq_f64
float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) { float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) {
return vfmad_laneq_f64(a, b, c, 1); return vfmad_laneq_f64(a, b, c, 1);
// CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
} }
// CHECK_AARCH64: test_vfmss_lane_f32 // CHECK: test_vfmss_lane_f32
float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) { float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
return vfmss_lane_f32(a, b, c, 1); return vfmss_lane_f32(a, b, c, 1);
// CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] // CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
} }
// CHECK_AARCH64: test_vfma_lane_f64 // CHECK: test_vfma_lane_f64
float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) { float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
return vfma_lane_f64(a, b, v, 0); return vfma_lane_f64(a, b, v, 0);
// CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
} }
// CHECK_AARCH64: test_vfms_lane_f64 // CHECK: test_vfms_lane_f64
float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) { float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
return vfms_lane_f64(a, b, v, 0); return vfms_lane_f64(a, b, v, 0);
// CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] // CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
} }
// CHECK_AARCH64: test_vfma_laneq_f64 // CHECK: test_vfma_laneq_f64
float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) { float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
return vfma_laneq_f64(a, b, v, 0); return vfma_laneq_f64(a, b, v, 0);
// CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
} }
// CHECK_AARCH64: test_vfms_laneq_f64 // CHECK: test_vfms_laneq_f64
float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) { float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
return vfms_laneq_f64(a, b, v, 0); return vfms_laneq_f64(a, b, v, 0);
// CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] // CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
} }
// CHECK: test_vqdmullh_lane_s16
int32_t test_vqdmullh_lane_s16(int16_t a, int16x4_t b) {
return vqdmullh_lane_s16(a, b, 3);
// CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
}
// CHECK: test_vqdmulls_lane_s32
int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) {
return vqdmulls_lane_s32(a, b, 1);
// CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
}
// CHECK: test_vqdmullh_laneq_s16
int32_t test_vqdmullh_laneq_s16(int16_t a, int16x8_t b) {
return vqdmullh_laneq_s16(a, b, 7);
// CHECK: sqdmull {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
}
// CHECK: test_vqdmulls_laneq_s32
int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) {
return vqdmulls_laneq_s32(a, b, 3);
// CHECK: sqdmull {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
}
// CHECK: test_vqdmulhh_lane_s16
int16_t test_vqdmulhh_lane_s16(int16_t a, int16x4_t b) {
return vqdmulhh_lane_s16(a, b, 3);
// CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
}
// CHECK: test_vqdmulhs_lane_s32
int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) {
return vqdmulhs_lane_s32(a, b, 1);
// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
}
// CHECK: test_vqdmulhh_laneq_s16
int16_t test_vqdmulhh_laneq_s16(int16_t a, int16x8_t b) {
return vqdmulhh_laneq_s16(a, b, 7);
// CHECK: sqdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
}
// CHECK: test_vqdmulhs_laneq_s32
int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) {
return vqdmulhs_laneq_s32(a, b, 3);
// CHECK: sqdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
}
// CHECK: test_vqrdmulhh_lane_s16
int16_t test_vqrdmulhh_lane_s16(int16_t a, int16x4_t b) {
return vqrdmulhh_lane_s16(a, b, 3);
// CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
}
// CHECK: test_vqrdmulhs_lane_s32
int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) {
return vqrdmulhs_lane_s32(a, b, 1);
// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
}
// CHECK: test_vqrdmulhh_laneq_s16
int16_t test_vqrdmulhh_laneq_s16(int16_t a, int16x8_t b) {
return vqrdmulhh_laneq_s16(a, b, 7);
// CHECK: sqrdmulh {{h[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
}
// CHECK: test_vqrdmulhs_laneq_s32
int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) {
return vqrdmulhs_laneq_s32(a, b, 3);
// CHECK: sqrdmulh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
}
// CHECK: test_vqdmlalh_lane_s16
int32_t test_vqdmlalh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
return vqdmlalh_lane_s16(a, b, c, 3);
// CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
}
// CHECK: test_vqdmlals_lane_s32
int64_t test_vqdmlals_lane_s32(int64_t a, int32_t b, int32x2_t c) {
return vqdmlals_lane_s32(a, b, c, 1);
// CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
}
// CHECK: test_vqdmlalh_laneq_s16
int32_t test_vqdmlalh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
return vqdmlalh_laneq_s16(a, b, c, 7);
// CHECK: sqdmlal {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
}
// CHECK: test_vqdmlals_laneq_s32
int64_t test_vqdmlals_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
return vqdmlals_laneq_s32(a, b, c, 3);
// CHECK: sqdmlal {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
}
// CHECK: test_vqdmlslh_lane_s16
int32_t test_vqdmlslh_lane_s16(int32_t a, int16_t b, int16x4_t c) {
return vqdmlslh_lane_s16(a, b, c, 3);
// CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[3]
}
// CHECK: test_vqdmlsls_lane_s32
int64_t test_vqdmlsls_lane_s32(int64_t a, int32_t b, int32x2_t c) {
return vqdmlsls_lane_s32(a, b, c, 1);
// CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
}
// CHECK: test_vqdmlslh_laneq_s16
int32_t test_vqdmlslh_laneq_s16(int32_t a, int16_t b, int16x8_t c) {
return vqdmlslh_laneq_s16(a, b, c, 7);
// CHECK: sqdmlsl {{s[0-9]+}}, {{h[0-9]+}}, {{v[0-9]+}}.h[7]
}
// CHECK: test_vqdmlsls_laneq_s32
int64_t test_vqdmlsls_laneq_s32(int64_t a, int32_t b, int32x4_t c) {
return vqdmlsls_laneq_s32(a, b, c, 3);
// CHECK: sqdmlsl {{d[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
}

View File

@ -140,7 +140,13 @@ enum OpKind {
OpScalarMulXLane, OpScalarMulXLane,
OpScalarMulXLaneQ, OpScalarMulXLaneQ,
OpScalarVMulXLane, OpScalarVMulXLane,
OpScalarVMulXLaneQ OpScalarVMulXLaneQ,
OpScalarQDMullLane,
OpScalarQDMullLaneQ,
OpScalarQDMulHiLane,
OpScalarQDMulHiLaneQ,
OpScalarQRDMulHiLane,
OpScalarQRDMulHiLaneQ
}; };
enum ClassKind { enum ClassKind {
@ -307,6 +313,13 @@ public:
OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ; OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane; OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ; OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
OpMap["OP_SCALAR_QDMULL_LN"] = OpScalarQDMullLane;
OpMap["OP_SCALAR_QDMULL_LNQ"] = OpScalarQDMullLaneQ;
OpMap["OP_SCALAR_QDMULH_LN"] = OpScalarQDMulHiLane;
OpMap["OP_SCALAR_QDMULH_LNQ"] = OpScalarQDMulHiLaneQ;
OpMap["OP_SCALAR_QRDMULH_LN"] = OpScalarQRDMulHiLane;
OpMap["OP_SCALAR_QRDMULH_LNQ"] = OpScalarQRDMulHiLaneQ;
Record *SI = R.getClass("SInst"); Record *SI = R.getClass("SInst");
Record *II = R.getClass("IInst"); Record *II = R.getClass("IInst");
@ -2033,8 +2046,8 @@ static std::string GenOpString(const std::string &name, OpKind op,
case OpScalarMulLane: { case OpScalarMulLane: {
std::string typeCode = ""; std::string typeCode = "";
InstructionTypeCode(typestr, ClassS, quad, typeCode); InstructionTypeCode(typestr, ClassS, quad, typeCode);
s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode + s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
"(__b, __c);\\\n __a * __d1;"; "(__b, __c);\\\n __a * __d1;";
break; break;
} }
case OpScalarMulLaneQ: { case OpScalarMulLaneQ: {
@ -2100,7 +2113,48 @@ static std::string GenOpString(const std::string &name, OpKind op,
" vset_lane_" + typeCode + "(__f1, __g1, 0);"; " vset_lane_" + typeCode + "(__f1, __g1, 0);";
break; break;
} }
case OpScalarQDMullLane: {
std::string typeCode = "";
InstructionTypeCode(typestr, ClassS, quad, typeCode);
s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
"vget_lane_" + typeCode + "(b, __c));";
break;
}
case OpScalarQDMullLaneQ: {
std::string typeCode = "";
InstructionTypeCode(typestr, ClassS, quad, typeCode);
s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
"vgetq_lane_" + typeCode + "(b, __c));";
break;
}
case OpScalarQDMulHiLane: {
std::string typeCode = "";
InstructionTypeCode(typestr, ClassS, quad, typeCode);
s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
"vget_lane_" + typeCode + "(__b, __c));";
break;
}
case OpScalarQDMulHiLaneQ: {
std::string typeCode = "";
InstructionTypeCode(typestr, ClassS, quad, typeCode);
s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
"vgetq_lane_" + typeCode + "(__b, __c));";
break;
}
case OpScalarQRDMulHiLane: {
std::string typeCode = "";
InstructionTypeCode(typestr, ClassS, quad, typeCode);
s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
"vget_lane_" + typeCode + "(__b, __c));";
break;
}
case OpScalarQRDMulHiLaneQ: {
std::string typeCode = "";
InstructionTypeCode(typestr, ClassS, quad, typeCode);
s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
"vgetq_lane_" + typeCode + "(__b, __c));";
break;
}
default: default:
PrintFatalError("unknown OpKind!"); PrintFatalError("unknown OpKind!");
} }