Implement AArch64 Neon Crypto instruction classes AES, SHA, and 3 SHA.

llvm-svn: 194086
This commit is contained in:
Jiangning Liu 2013-11-05 17:42:24 +00:00
parent d7c52676f6
commit 34a7109b47
3 changed files with 181 additions and 30 deletions

View File

@ -768,6 +768,25 @@ def VADDV : SInst<"vaddv", "sd", "csiUcUsUiQcQsQiQUcQUsQUi">;
def FMAXNMV : SInst<"vmaxnmv", "sd", "Qf">;
def FMINNMV : SInst<"vminnmv", "sd", "Qf">;
////////////////////////////////////////////////////////////////////////////////
// Crypto
def AESE : SInst<"vaese", "ddd", "QUc">;
def AESD : SInst<"vaesd", "ddd", "QUc">;
def AESMC : SInst<"vaesmc", "dd", "QUc">;
def AESIMC : SInst<"vaesimc", "dd", "QUc">;
def SHA1H : SInst<"vsha1h", "ss", "Ui">;
def SHA1SU1 : SInst<"vsha1su1", "ddd", "QUi">;
def SHA256SU0 : SInst<"vsha256su0", "ddd", "QUi">;
def SHA1C : SInst<"vsha1c", "ddsd", "QUi">;
def SHA1P : SInst<"vsha1p", "ddsd", "QUi">;
def SHA1M : SInst<"vsha1m", "ddsd", "QUi">;
def SHA1SU0 : SInst<"vsha1su0", "dddd", "QUi">;
def SHA256H : SInst<"vsha256h", "dddd", "QUi">;
def SHA256H2 : SInst<"vsha256h2", "dddd", "QUi">;
def SHA256SU1 : SInst<"vsha256su1", "dddd", "QUi">;
////////////////////////////////////////////////////////////////////////////////
// Scalar Arithmetic

View File

@ -1770,6 +1770,45 @@ static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
// argument that specifies the vector type, need to handle each case.
switch (BuiltinID) {
default: break;
case AArch64::BI__builtin_neon_vget_lane_i8:
case AArch64::BI__builtin_neon_vget_lane_i16:
case AArch64::BI__builtin_neon_vget_lane_i32:
case AArch64::BI__builtin_neon_vget_lane_i64:
case AArch64::BI__builtin_neon_vget_lane_f32:
case AArch64::BI__builtin_neon_vget_lane_f64:
case AArch64::BI__builtin_neon_vgetq_lane_i8:
case AArch64::BI__builtin_neon_vgetq_lane_i16:
case AArch64::BI__builtin_neon_vgetq_lane_i32:
case AArch64::BI__builtin_neon_vgetq_lane_i64:
case AArch64::BI__builtin_neon_vgetq_lane_f32:
case AArch64::BI__builtin_neon_vgetq_lane_f64:
return CGF.EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E);
case AArch64::BI__builtin_neon_vset_lane_i8:
case AArch64::BI__builtin_neon_vset_lane_i16:
case AArch64::BI__builtin_neon_vset_lane_i32:
case AArch64::BI__builtin_neon_vset_lane_i64:
case AArch64::BI__builtin_neon_vset_lane_f32:
case AArch64::BI__builtin_neon_vset_lane_f64:
case AArch64::BI__builtin_neon_vsetq_lane_i8:
case AArch64::BI__builtin_neon_vsetq_lane_i16:
case AArch64::BI__builtin_neon_vsetq_lane_i32:
case AArch64::BI__builtin_neon_vsetq_lane_i64:
case AArch64::BI__builtin_neon_vsetq_lane_f32:
case AArch64::BI__builtin_neon_vsetq_lane_f64:
return CGF.EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E);
// Crypto
case AArch64::BI__builtin_neon_vsha1h_u32:
Int = Intrinsic::arm_neon_sha1h;
s = "sha1h"; OverloadInt = true; break;
case AArch64::BI__builtin_neon_vsha1cq_u32:
Int = Intrinsic::aarch64_neon_sha1c;
s = "sha1c"; break;
case AArch64::BI__builtin_neon_vsha1pq_u32:
Int = Intrinsic::aarch64_neon_sha1p;
s = "sha1p"; break;
case AArch64::BI__builtin_neon_vsha1mq_u32:
Int = Intrinsic::aarch64_neon_sha1m;
s = "sha1m"; break;
// Scalar Add
case AArch64::BI__builtin_neon_vaddd_s64:
Int = Intrinsic::aarch64_neon_vaddds;
@ -2434,36 +2473,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
Ops.push_back(EmitScalarExpr(E->getArg(i)));
}
// Some intrinsic isn't overloaded.
switch (BuiltinID) {
default: break;
case AArch64::BI__builtin_neon_vget_lane_i8:
case AArch64::BI__builtin_neon_vget_lane_i16:
case AArch64::BI__builtin_neon_vget_lane_i32:
case AArch64::BI__builtin_neon_vget_lane_i64:
case AArch64::BI__builtin_neon_vget_lane_f32:
case AArch64::BI__builtin_neon_vget_lane_f64:
case AArch64::BI__builtin_neon_vgetq_lane_i8:
case AArch64::BI__builtin_neon_vgetq_lane_i16:
case AArch64::BI__builtin_neon_vgetq_lane_i32:
case AArch64::BI__builtin_neon_vgetq_lane_i64:
case AArch64::BI__builtin_neon_vgetq_lane_f32:
case AArch64::BI__builtin_neon_vgetq_lane_f64:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vget_lane_i8, E);
case AArch64::BI__builtin_neon_vset_lane_i8:
case AArch64::BI__builtin_neon_vset_lane_i16:
case AArch64::BI__builtin_neon_vset_lane_i32:
case AArch64::BI__builtin_neon_vset_lane_i64:
case AArch64::BI__builtin_neon_vset_lane_f32:
case AArch64::BI__builtin_neon_vset_lane_f64:
case AArch64::BI__builtin_neon_vsetq_lane_i8:
case AArch64::BI__builtin_neon_vsetq_lane_i16:
case AArch64::BI__builtin_neon_vsetq_lane_i32:
case AArch64::BI__builtin_neon_vsetq_lane_i64:
case AArch64::BI__builtin_neon_vsetq_lane_f32:
case AArch64::BI__builtin_neon_vsetq_lane_f64:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vset_lane_i8, E);
}
// Get the last argument, which specifies the vector type.
llvm::APSInt Result;
@ -2769,6 +2778,38 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
case AArch64::BI__builtin_neon_vst4q_v:
return EmitARMBuiltinExpr(ARM::BI__builtin_neon_vst4q_v, E);
// Crypto
case AArch64::BI__builtin_neon_vaeseq_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aese, Ty),
Ops, "aese");
case AArch64::BI__builtin_neon_vaesdq_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesd, Ty),
Ops, "aesd");
case AArch64::BI__builtin_neon_vaesmcq_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesmc, Ty),
Ops, "aesmc");
case AArch64::BI__builtin_neon_vaesimcq_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_aesimc, Ty),
Ops, "aesimc");
case AArch64::BI__builtin_neon_vsha1su1q_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su1, Ty),
Ops, "sha1su1");
case AArch64::BI__builtin_neon_vsha256su0q_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su0, Ty),
Ops, "sha256su0");
case AArch64::BI__builtin_neon_vsha1su0q_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1su0, Ty),
Ops, "sha1su0");
case AArch64::BI__builtin_neon_vsha256hq_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h, Ty),
Ops, "sha256h");
case AArch64::BI__builtin_neon_vsha256h2q_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256h2, Ty),
Ops, "sha256h2");
case AArch64::BI__builtin_neon_vsha256su1q_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su1, Ty),
Ops, "sha256su1");
// AArch64-only builtins
case AArch64::BI__builtin_neon_vfma_lane_v:
case AArch64::BI__builtin_neon_vfmaq_laneq_v: {

View File

@ -0,0 +1,91 @@
// REQUIRES: aarch64-registered-target
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
// Test new aarch64 intrinsics and types
#include <arm_neon.h>
uint8x16_t test_vaeseq_u8(uint8x16_t data, uint8x16_t key) {
// CHECK: test_vaeseq_u8
return vaeseq_u8(data, key);
// CHECK: aese {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
uint8x16_t test_vaesdq_u8(uint8x16_t data, uint8x16_t key) {
// CHECK: test_vaesdq_u8
return vaesdq_u8(data, key);
// CHECK: aesd {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
uint8x16_t test_vaesmcq_u8(uint8x16_t data) {
// CHECK: test_vaesmcq_u8
return vaesmcq_u8(data);
// CHECK: aesmc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
uint8x16_t test_vaesimcq_u8(uint8x16_t data) {
// CHECK: test_vaesimcq_u8
return vaesimcq_u8(data);
// CHECK: aesimc {{v[0-9]+}}.16b, {{v[0-9]+}}.16b
}
uint32_t test_vsha1h_u32(uint32_t hash_e) {
// CHECK: test_vsha1h_u32
return vsha1h_u32(hash_e);
// CHECK: sha1h {{s[0-9]+}}, {{s[0-9]+}}
}
uint32x4_t test_vsha1su1q_u32(uint32x4_t tw0_3, uint32x4_t w12_15) {
// CHECK: test_vsha1su1q_u32
return vsha1su1q_u32(tw0_3, w12_15);
// CHECK: sha1su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
uint32x4_t test_vsha256su0q_u32(uint32x4_t w0_3, uint32x4_t w4_7) {
// CHECK: test_vsha256su0q_u32
return vsha256su0q_u32(w0_3, w4_7);
// CHECK: sha256su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
uint32x4_t test_vsha1cq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) {
// CHECK: test_vsha1cq_u32
return vsha1cq_u32(hash_abcd, hash_e, wk);
// CHECK: sha1c {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
}
uint32x4_t test_vsha1pq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) {
// CHECK: test_vsha1pq_u32
return vsha1pq_u32(hash_abcd, hash_e, wk);
// CHECK: sha1p {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
}
uint32x4_t test_vsha1mq_u32(uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) {
// CHECK: test_vsha1mq_u32
return vsha1mq_u32(hash_abcd, hash_e, wk);
// CHECK: sha1m {{q[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.4s
}
uint32x4_t test_vsha1su0q_u32(uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) {
// CHECK: test_vsha1su0q_u32
return vsha1su0q_u32(w0_3, w4_7, w8_11);
// CHECK: sha1su0 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}
uint32x4_t test_vsha256hq_u32(uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) {
// CHECK: test_vsha256hq_u32
return vsha256hq_u32(hash_abcd, hash_efgh, wk);
// CHECK: sha256h {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
}
uint32x4_t test_vsha256h2q_u32(uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) {
// CHECK: test_vsha256h2q_u32
return vsha256h2q_u32(hash_efgh, hash_abcd, wk);
// CHECK: sha256h2 {{q[0-9]+}}, {{q[0-9]+}}, {{v[0-9]+}}.4s
}
uint32x4_t test_vsha256su1q_u32(uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15) {
// CHECK: test_vsha256su1q_u32
return vsha256su1q_u32(tw0_3, w8_11, w12_15);
// CHECK: sha256su1 {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
}