Implemented aarch64 neon intrinsic vcopy_lane with float type.

llvm-svn: 194042
This commit is contained in:
Kevin Qin 2013-11-05 02:05:44 +00:00
parent 97f6aaa8ad
commit 9eece7b5e0
4 changed files with 266 additions and 17 deletions

View File

@ -98,7 +98,9 @@ def OP_DIV : Op;
def OP_LONG_HI : Op;
def OP_NARROW_HI : Op;
def OP_MOVL_HI : Op;
def OP_COPY : Op;
def OP_COPY_LN : Op;
def OP_COPYQ_LN : Op;
def OP_COPY_LNQ : Op;
class Inst <string n, string p, string t, Op o> {
string Name = n;
@ -669,13 +671,18 @@ def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>;
////////////////////////////////////////////////////////////////////////////////
// Extract or insert element from vector
def GET_LANE : IInst<"vget_lane", "sdi",
"csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPs">;
"csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQd">;
def SET_LANE : IInst<"vset_lane", "dsdi",
"csilPcPsUiUlUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPshfdQhQfQd">;
"csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQd">;
def COPY_LANE : IOpInst<"vcopy_lane", "ddidi",
"csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPs", OP_COPY>;
def COPY_LANEQ : IOpInst<"vcopy_laneq", "ggidi",
"QcQsQiQlQUcQUsQUiQUlQPcQPs", OP_COPY>;
"csiPcPsUcUsUiPcPsf", OP_COPY_LN>;
def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi",
"QcQsQiQlQUcQUsQUiQUlQPcQPsQfQd", OP_COPYQ_LN>;
def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki",
"csiPcPsUcUsUif", OP_COPY_LNQ>;
def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "ddidi",
"QcQsQiQlQUcQUsQUiQUlQPcQPsQfd", OP_COPY_LN>;
////////////////////////////////////////////////////////////////////////////////
// Set all lanes to same value

View File

@ -2441,23 +2441,25 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case AArch64::BI__builtin_neon_vget_lane_i16:
case AArch64::BI__builtin_neon_vget_lane_i32:
case AArch64::BI__builtin_neon_vget_lane_i64:
case AArch64::BI__builtin_neon_vget_lane_f32:
case AArch64::BI__builtin_neon_vget_lane_f64:
case AArch64::BI__builtin_neon_vgetq_lane_i8:
case AArch64::BI__builtin_neon_vgetq_lane_i16:
case AArch64::BI__builtin_neon_vgetq_lane_i32:
case AArch64::BI__builtin_neon_vgetq_lane_i64:
case AArch64::BI__builtin_neon_vgetq_lane_f32:
case AArch64::BI__builtin_neon_vgetq_lane_f64:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E);
case AArch64::BI__builtin_neon_vset_lane_i8:
case AArch64::BI__builtin_neon_vset_lane_i16:
case AArch64::BI__builtin_neon_vset_lane_i32:
case AArch64::BI__builtin_neon_vset_lane_i64:
case AArch64::BI__builtin_neon_vset_lane_f16:
case AArch64::BI__builtin_neon_vset_lane_f32:
case AArch64::BI__builtin_neon_vset_lane_f64:
case AArch64::BI__builtin_neon_vsetq_lane_i8:
case AArch64::BI__builtin_neon_vsetq_lane_i16:
case AArch64::BI__builtin_neon_vsetq_lane_i32:
case AArch64::BI__builtin_neon_vsetq_lane_i64:
case AArch64::BI__builtin_neon_vsetq_lane_f16:
case AArch64::BI__builtin_neon_vsetq_lane_f32:
case AArch64::BI__builtin_neon_vsetq_lane_f64:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E);

View File

@ -281,16 +281,238 @@ int16x4_t test_vcopy_lane_s16(int16x4_t v1, int16x4_t v2) {
// CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
}
poly8x8_t test_vcopy_lane_p8(poly8x8_t v1, poly8x8_t v2) {
// CHECK: test_vcopy_lane_p8
return vcopy_lane_p8(v1, 5, v2, 3);
// CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
}
poly16x4_t test_vcopy_lane_p16(poly16x4_t v1, poly16x4_t v2) {
// CHECK: test_vcopy_lane_p16
return vcopy_lane_p16(v1, 2, v2, 3);
// CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
}
int32x2_t test_vcopy_lane_s32(int32x2_t v1, int32x2_t v2) {
// CHECK: test_vcopy_lane_s32
return vcopy_lane_s32(v1, 0, v2, 1);
// CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
}
int64x1_t test_vcopy_lane_s64(int64x1_t v1, int64x1_t v2) {
// CHECK: test_vcopy_lane_s64
return vcopy_lane_s64(v1, 0, v2, 0);
// CHECK: fmov {{d[0-9]+}}, {{d[0-9]+}}
float32x2_t test_vcopy_lane_f32(float32x2_t v1, float32x2_t v2) {
// CHECK: test_vcopy_lane_f32
return vcopy_lane_f32(v1, 0, v2, 1);
// CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
}
uint8x8_t test_vcopy_lane_u8(uint8x8_t v1, uint8x8_t v2) {
// CHECK: test_vcopy_lane_u8
return vcopy_lane_u8(v1, 5, v2, 3);
// CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
}
uint16x4_t test_vcopy_lane_u16(uint16x4_t v1, uint16x4_t v2) {
// CHECK: test_vcopy_lane_u16
return vcopy_lane_u16(v1, 2, v2, 3);
// CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
}
uint32x2_t test_vcopy_lane_u32(uint32x2_t v1, uint32x2_t v2) {
// CHECK: test_vcopy_lane_u32
return vcopy_lane_u32(v1, 0, v2, 1);
// CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
}
int8x8_t test_vcopy_laneq_s8(int8x8_t v1, int8x16_t v2) {
// CHECK: test_vcopy_laneq_s8
return vcopy_laneq_s8(v1, 5, v2, 3);
// CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
}
int16x4_t test_vcopy_laneq_s16(int16x4_t v1, int16x8_t v2) {
// CHECK: test_vcopy_laneq_s16
return vcopy_laneq_s16(v1, 2, v2, 3);
// CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
}
poly8x8_t test_vcopy_laneq_p8(poly8x8_t v1, poly8x16_t v2) {
// CHECK: test_vcopy_laneq_p8
return vcopy_laneq_p8(v1, 5, v2, 3);
// CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
}
poly16x4_t test_vcopy_laneq_p16(poly16x4_t v1, poly16x8_t v2) {
// CHECK: test_vcopy_laneq_p16
return vcopy_laneq_p16(v1, 2, v2, 3);
// CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
}
int32x2_t test_vcopy_laneq_s32(int32x2_t v1, int32x4_t v2) {
// CHECK: test_vcopy_laneq_s32
return vcopy_laneq_s32(v1, 0, v2, 1);
// CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
}
float32x2_t test_vcopy_laneq_f32(float32x2_t v1, float32x4_t v2) {
// CHECK: test_vcopy_laneq_f32
return vcopy_laneq_f32(v1, 0, v2, 1);
// CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
}
uint8x8_t test_vcopy_laneq_u8(uint8x8_t v1, uint8x16_t v2) {
// CHECK: test_vcopy_laneq_u8
return vcopy_laneq_u8(v1, 5, v2, 3);
// CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
}
uint16x4_t test_vcopy_laneq_u16(uint16x4_t v1, uint16x8_t v2) {
// CHECK: test_vcopy_laneq_u16
return vcopy_laneq_u16(v1, 2, v2, 3);
// CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
}
uint32x2_t test_vcopy_laneq_u32(uint32x2_t v1, uint32x4_t v2) {
// CHECK: test_vcopy_laneq_u32
return vcopy_laneq_u32(v1, 0, v2, 1);
// CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
}
int8x16_t test_vcopyq_lane_s8(int8x16_t v1, int8x8_t v2) {
// CHECK: test_vcopyq_lane_s8
return vcopyq_lane_s8(v1, 5, v2, 3);
// CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
}
int16x8_t test_vcopyq_lane_s16(int16x8_t v1, int16x4_t v2) {
// CHECK: test_vcopyq_lane_s16
return vcopyq_lane_s16(v1, 2, v2, 3);
// CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
}
poly8x16_t test_vcopyq_lane_p8(poly8x16_t v1, poly8x8_t v2) {
// CHECK: test_vcopyq_lane_p8
return vcopyq_lane_p8(v1, 5, v2, 3);
// CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
}
poly16x8_t test_vcopyq_lane_p16(poly16x8_t v1, poly16x4_t v2) {
// CHECK: test_vcopyq_lane_p16
return vcopyq_lane_p16(v1, 2, v2, 3);
// CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
}
int32x4_t test_vcopyq_lane_s32(int32x4_t v1, int32x2_t v2) {
// CHECK: test_vcopyq_lane_s32
return vcopyq_lane_s32(v1, 0, v2, 1);
// CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
}
int64x2_t test_vcopyq_lane_s64(int64x2_t v1, int64x1_t v2) {
// CHECK: test_vcopyq_lane_s64
return vcopyq_lane_s64(v1, 1, v2, 0);
// CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
}
float32x4_t test_vcopyq_lane_f32(float32x4_t v1, float32x2_t v2) {
// CHECK: test_vcopyq_lane_f32
return vcopyq_lane_f32(v1, 0, v2, 1);
// CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
}
float64x2_t test_vcopyq_lane_f64(float64x2_t v1, float64x1_t v2) {
// CHECK: test_vcopyq_lane_f64
return vcopyq_lane_f64(v1, 1, v2, 0);
// CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
}
uint8x16_t test_vcopyq_lane_u8(uint8x16_t v1, uint8x8_t v2) {
// CHECK: test_vcopyq_lane_u8
return vcopyq_lane_u8(v1, 5, v2, 3);
// CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
}
uint16x8_t test_vcopyq_lane_u16(uint16x8_t v1, uint16x4_t v2) {
// CHECK: test_vcopyq_lane_u16
return vcopyq_lane_u16(v1, 2, v2, 3);
// CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
}
uint32x4_t test_vcopyq_lane_u32(uint32x4_t v1, uint32x2_t v2) {
// CHECK: test_vcopyq_lane_u32
return vcopyq_lane_u32(v1, 0, v2, 1);
// CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
}
uint64x2_t test_vcopyq_lane_u64(uint64x2_t v1, uint64x1_t v2) {
// CHECK: test_vcopyq_lane_u64
return vcopyq_lane_u64(v1, 1, v2, 0);
// CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
}
int8x16_t test_vcopyq_laneq_s8(int8x16_t v1, int8x16_t v2) {
// CHECK: test_vcopyq_laneq_s8
return vcopyq_laneq_s8(v1, 5, v2, 3);
// CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
}
int16x8_t test_vcopyq_laneq_s16(int16x8_t v1, int16x8_t v2) {
// CHECK: test_vcopyq_laneq_s16
return vcopyq_laneq_s16(v1, 2, v2, 3);
// CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
}
poly8x16_t test_vcopyq_laneq_p8(poly8x16_t v1, poly8x16_t v2) {
// CHECK: test_vcopyq_laneq_p8
return vcopyq_laneq_p8(v1, 5, v2, 3);
// CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
}
poly16x8_t test_vcopyq_laneq_p16(poly16x8_t v1, poly16x8_t v2) {
// CHECK: test_vcopyq_laneq_p16
return vcopyq_laneq_p16(v1, 2, v2, 3);
// CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
}
int32x4_t test_vcopyq_laneq_s32(int32x4_t v1, int32x4_t v2) {
// CHECK: test_vcopyq_laneq_s32
return vcopyq_laneq_s32(v1, 0, v2, 1);
// CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
}
float32x4_t test_vcopyq_laneq_f32(float32x4_t v1, float32x4_t v2) {
// CHECK: test_vcopyq_laneq_f32
return vcopyq_laneq_f32(v1, 0, v2, 1);
// CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
}
int64x2_t test_vcopyq_laneq_s64(int64x2_t v1, int64x2_t v2) {
// CHECK: test_vcopyq_laneq_s64
return vcopyq_laneq_s64(v1, 1, v2, 1);
// CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[1]
}
uint8x16_t test_vcopyq_laneq_u8(uint8x16_t v1, uint8x16_t v2) {
// CHECK: test_vcopyq_laneq_u8
return vcopyq_laneq_u8(v1, 5, v2, 3);
// CHECK: ins {{v[0-9]+}}.b[5], {{v[0-9]+}}.b[3]
}
uint16x8_t test_vcopyq_laneq_u16(uint16x8_t v1, uint16x8_t v2) {
// CHECK: test_vcopyq_laneq_u16
return vcopyq_laneq_u16(v1, 2, v2, 3);
// CHECK: ins {{v[0-9]+}}.h[2], {{v[0-9]+}}.h[3]
}
uint32x4_t test_vcopyq_laneq_u32(uint32x4_t v1, uint32x4_t v2) {
// CHECK: test_vcopyq_laneq_u32
return vcopyq_laneq_u32(v1, 0, v2, 1);
// CHECK: ins {{v[0-9]+}}.s[0], {{v[0-9]+}}.s[1]
}
uint64x2_t test_vcopyq_laneq_u64(uint64x2_t v1, uint64x2_t v2) {
// CHECK: test_vcopyq_laneq_u64
return vcopyq_laneq_u64(v1, 0, v2, 1);
// CHECK: ins {{v[0-9]+}}.d[0], {{v[0-9]+}}.d[1]
}
int8x8_t test_vcreate_s8(uint64_t v1) {

View File

@ -120,7 +120,9 @@ enum OpKind {
OpLongHi,
OpNarrowHi,
OpMovlHi,
OpCopy
OpCopyLane,
OpCopyQLane,
OpCopyLaneQ
};
enum ClassKind {
@ -265,7 +267,9 @@ public:
OpMap["OP_LONG_HI"] = OpLongHi;
OpMap["OP_NARROW_HI"] = OpNarrowHi;
OpMap["OP_MOVL_HI"] = OpMovlHi;
OpMap["OP_COPY"] = OpCopy;
OpMap["OP_COPY_LN"] = OpCopyLane;
OpMap["OP_COPYQ_LN"] = OpCopyQLane;
OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
Record *SI = R.getClass("SInst");
Record *II = R.getClass("IInst");
@ -1358,7 +1362,7 @@ static std::string GenArgs(const std::string &proto, StringRef typestr,
}
s.push_back(arg);
//To avoid argument being multiple defined, add extra number for renaming.
if (name == "vcopy_lane")
if (name == "vcopy_lane" || name == "vcopy_laneq")
s.push_back('1');
if ((i + 1) < e)
s += ", ";
@ -1383,7 +1387,7 @@ static std::string GenMacroLocals(const std::string &proto, StringRef typestr,
continue;
generatedLocal = true;
bool extranumber = false;
if(name == "vcopy_lane")
if (name == "vcopy_lane" || name == "vcopy_laneq")
extranumber = true;
s += TypeString(proto[i], typestr) + " __";
@ -1854,12 +1858,26 @@ static std::string GenOpString(const std::string &name, OpKind op,
MangleName(RemoveHigh(name), typestr, ClassS) + "(__b, __c));";
break;
}
case OpCopy: {
case OpCopyLane: {
s += TypeString('s', typestr) + " __c2 = " +
MangleName("vget_lane", typestr, ClassS) + "(__c1, __d1); \\\n " +
MangleName("vset_lane", typestr, ClassS) + "(__c2, __a1, __b1);";
break;
}
case OpCopyQLane: {
std::string typeCode = "";
InstructionTypeCode(typestr, ClassS, quad, typeCode);
s += TypeString('s', typestr) + " __c2 = vget_lane_" + typeCode +
"(__c1, __d1); \\\n vsetq_lane_" + typeCode + "(__c2, __a1, __b1);";
break;
}
case OpCopyLaneQ: {
std::string typeCode = "";
InstructionTypeCode(typestr, ClassS, quad, typeCode);
s += TypeString('s', typestr) + " __c2 = vgetq_lane_" + typeCode +
"(__c1, __d1); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b1);";
break;
}
default:
PrintFatalError("unknown OpKind!");
}