[clang,ARM] Initial ACLE intrinsics for MVE.

This commit sets up the infrastructure for auto-generating <arm_mve.h>
and doing clang-side code generation for the builtins it relies on,
and demonstrates that it works by implementing a representative sample
of the ACLE intrinsics, more or less matching the ones introduced in
LLVM IR by D67158,D68699,D68700.

Like NEON, that header file will provide a set of vector types like
uint16x8_t and C functions with names like vaddq_u32(). Unlike NEON,
the ACLE spec for <arm_mve.h> includes a polymorphism system, so that
you can write plain vaddq() and disambiguate by the vector types you
pass to it.

Unlike the corresponding NEON code, I've arranged to make every user-
facing ACLE intrinsic into a clang builtin, and implement all the code
generation inside clang. So <arm_mve.h> itself contains nothing but
typedefs and function declarations, with the latter all using the new
`__attribute__((__clang_builtin))` system to arrange that the user-
facing function names correspond to the right internal BuiltinIDs.

So the new MveEmitter tablegen system specifies the full sequence of
IRBuilder operations that each user-facing ACLE intrinsic should
translate into. Where possible, the ACLE intrinsics map to standard IR
operations such as vector-typed `add` and `fadd`; where no standard
representation exists, I call down to the sample IR intrinsics
introduced in an earlier commit.

Doing it like this means that you get the polymorphism for free just
by using __attribute__((overloadable)): the clang overload resolution
decides which function declaration is the relevant one, and _then_ its
BuiltinID is looked up, so by the time we're doing code generation,
that's all been resolved by the standard system. It also means that
you get really nice error messages if the user passes the wrong
combination of types: clang will show the declarations from the header
file and explain why each one doesn't match.

(The obvious alternative approach would be to have wrapper functions
in <arm_mve.h> which pass their arguments to the underlying builtins.
But that doesn't work in the case where one of the arguments has to be
a constant integer: the wrapper function can't pass the constantness
through. So you'd have to do that case using a macro instead, and then
use C11 `_Generic` to handle the polymorphism. Then you have to add
horrible workarounds because `_Generic` requires even the untaken
branches to type-check successfully, and //then// if the user gets the
types wrong, the error message is totally unreadable!)

Reviewers: dmgreen, miyuki, ostannard

Subscribers: mgorny, javed.absar, kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D67161
This commit is contained in:
Simon Tatham 2019-09-02 15:50:50 +01:00
parent 7c11da0cfd
commit 08074cc965
23 changed files with 2890 additions and 11 deletions

View File

@ -189,6 +189,11 @@ BUILTIN(__builtin_arm_wsr, "vcC*Ui", "nc")
BUILTIN(__builtin_arm_wsr64, "vcC*LLUi", "nc")
BUILTIN(__builtin_arm_wsrp, "vcC*vC*", "nc")
// Builtins for implementing ACLE MVE intrinsics. (Unlike NEON, these
// don't need to live in a separate BuiltinsMVE.def, because they
// aren't included from both here and BuiltinsAArch64.def.)
#include "clang/Basic/arm_mve_builtins.inc"
// MSVC
LANGBUILTIN(__emit, "vIUiC", "", ALL_MS_LANGUAGES)

View File

@ -41,10 +41,22 @@ clang_tablegen(AttrHasAttributeImpl.inc -gen-clang-attr-has-attribute-impl
TARGET ClangAttrHasAttributeImpl
)
# ARM NEON
# ARM NEON and MVE
clang_tablegen(arm_neon.inc -gen-arm-neon-sema
SOURCE arm_neon.td
TARGET ClangARMNeon)
clang_tablegen(arm_fp16.inc -gen-arm-neon-sema
SOURCE arm_fp16.td
TARGET ClangARMFP16)
clang_tablegen(arm_mve_builtins.inc -gen-arm-mve-builtin-def
SOURCE arm_mve.td
TARGET ClangARMMveBuiltinsDef)
clang_tablegen(arm_mve_builtin_cg.inc -gen-arm-mve-builtin-codegen
SOURCE arm_mve.td
TARGET ClangARMMveBuiltinCG)
clang_tablegen(arm_mve_builtin_sema.inc -gen-arm-mve-builtin-sema
SOURCE arm_mve.td
TARGET ClangARMMveBuiltinSema)
clang_tablegen(arm_mve_builtin_aliases.inc -gen-arm-mve-builtin-aliases
SOURCE arm_mve.td
TARGET ClangARMMveBuiltinAliases)

View File

@ -8662,6 +8662,12 @@ def warn_argument_invalid_range : Warning<
InGroup<DiagGroup<"argument-outside-range">>;
def err_argument_not_multiple : Error<
"argument should be a multiple of %0">;
def err_argument_not_power_of_2 : Error<
"argument should be a power of 2">;
def err_argument_not_shifted_byte : Error<
"argument should be an 8-bit value shifted by a multiple of 8 bits">;
def err_argument_not_shifted_byte_or_xxff : Error<
"argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF">;
def warn_neon_vector_initializer_non_portable : Warning<
"vector initializers are not compatible with NEON intrinsics in big endian "
"mode">, InGroup<DiagGroup<"nonportable-vector-initialization">>;

View File

@ -0,0 +1,121 @@
//===- arm_mve.td - ACLE intrinsic functions for MVE architecture ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the set of ACLE-specified source-level intrinsic
// functions wrapping the MVE vector instruction set and scalar shift
// operations.
//
// Refer to comments in arm_mve_defs.td for the infrastructure used in
// here, and to MveEmitter.cpp for how those are used in turn to
// generate code.
//
//===----------------------------------------------------------------------===//
include "arm_mve_defs.td"
let params = T.All in
foreach n = [ 2, 4 ] in {
def "vst"#n#"q": Intrinsic<Void, (args Ptr<Scalar>, MultiVector<n>),
(CustomCodegen<"VST24"> n:$NumVectors,
"Intrinsic::arm_mve_vst"#n#"q":$IRIntr)>;
def "vld"#n#"q": Intrinsic<MultiVector<n>, (args CPtr<Scalar>),
(CustomCodegen<"VLD24"> n:$NumVectors,
"Intrinsic::arm_mve_vld"#n#"q":$IRIntr)>;
}
let params = T.Int in {
def vaddq: Intrinsic<Vector, (args Vector:$a, Vector:$b), (add $a, $b)>;
def vsubq: Intrinsic<Vector, (args Vector:$a, Vector:$b), (sub $a, $b)>;
}
let params = T.Float in {
def vaddqf: Intrinsic<Vector, (args Vector:$a, Vector:$b), (fadd $a, $b)>,
NameOverride<"vaddq">;
def vsubqf: Intrinsic<Vector, (args Vector:$a, Vector:$b), (fsub $a, $b)>,
NameOverride<"vsubq">;
}
let params = T.Usual in {
def vaddq_m: Intrinsic<
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
(IRInt<"add_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
def vsubq_m: Intrinsic<
Vector, (args Vector:$inactive, Vector:$a, Vector:$b, Predicate:$pred),
(IRInt<"sub_predicated", [Vector, Predicate]> $a, $b, $pred, $inactive)>;
}
let params = T.Int in {
def vminvq: Intrinsic<Scalar, (args Scalar:$prev, Vector:$vec),
(Scalar (IRInt<"minv", [Vector], 1> $prev, $vec))>;
def vmaxvq: Intrinsic<Scalar, (args Scalar:$prev, Vector:$vec),
(Scalar (IRInt<"maxv", [Vector], 1> $prev, $vec))>;
}
foreach half = [ "b", "t" ] in
foreach halfconst = [ !if(!eq(half, "b"), 0, 1) ] in {
let params = [f32], pnt = PNT_None in {
def vcvt#half#q_f16: Intrinsic<
VecOf<f16>, (args VecOf<f16>:$inactive, Vector:$a),
(IRInt<"vcvt_narrow"> $inactive, $a, halfconst)>;
def vcvt#half#q_m_f16: Intrinsic<
VecOf<f16>, (args VecOf<f16>:$inactive, Vector:$a, PredOf<f32>:$pred),
(IRInt<"vcvt_narrow_predicated"> $inactive, $a, halfconst, $pred)>;
} // params = [f32], pnt = PNT_None
} // loop over half = "b", "t"
let params = T.All32, pnt = PNT_None in
def vldrwq_gather_base_wb: Intrinsic<
Vector, (args Ptr<VecOf<Unsigned<Scalar>>>:$addr, imm_mem7bit<4>:$offset),
(seq (IRInt<"vldr_gather_base_wb", [Vector, VecOf<Unsigned<Scalar>>]>
(load $addr), $offset):$pair,
(store (xval $pair, 1), $addr),
(xval $pair, 0))>;
let params = T.All64, pnt = PNT_None in
def vldrdq_gather_base_wb_z: Intrinsic<
Vector, (args Ptr<VecOf<Unsigned<Scalar>>>:$addr, imm_mem7bit<8>:$offset,
Predicate:$pred),
(seq (IRInt<"vldr_gather_base_wb_predicated", [Vector, VecOf<Unsigned<Scalar>>, Predicate]>
(load $addr), $offset, $pred):$pair,
(store (xval $pair, 1), $addr),
(xval $pair, 0))>;
let params = [Void], pnt = PNT_None in
def urshrl: Intrinsic<u64, (args u64:$value, imm_1to32:$shift),
(seq (u32 (lshr $value, (u64 32))):$hi,
(u32 $value):$lo,
(IRInt<"urshrl"> $lo, $hi, $shift):$pair,
(or (shl (u64 (xval $pair, 1)), (u64 32)),
(u64 (xval $pair, 0))))>;
let params = T.Int32 in {
def vadcq: Intrinsic<Vector, (args Vector:$a, Vector:$b, Ptr<u32>:$carry),
(seq (IRInt<"vadc", [Vector]> $a, $b, (shl (load $carry), 29)):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
def vadciq: Intrinsic<Vector, (args Vector:$a, Vector:$b, Ptr<u32>:$carry),
(seq (IRInt<"vadc", [Vector]> $a, $b, 0):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
def vadcq_m: Intrinsic<Vector, (args Vector:$inactive, Vector:$a, Vector:$b,
Ptr<u32>:$carry, Predicate:$pred),
(seq (IRInt<"vadc_predicated", [Vector, Predicate]> $inactive, $a, $b,
(shl (load $carry), 29), $pred):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
def vadciq_m: Intrinsic<Vector, (args Vector:$inactive, Vector:$a, Vector:$b,
Ptr<u32>:$carry, Predicate:$pred),
(seq (IRInt<"vadc_predicated", [Vector, Predicate]> $inactive, $a, $b,
0, $pred):$pair,
(store (and 1, (lshr (xval $pair, 1), 29)), $carry),
(xval $pair, 0))>;
}

View File

@ -0,0 +1,325 @@
//===- arm_mve_defs.td - definitions and infrastructure for arm_mve.td ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// The definitions in this file are designed to work in close conjunction with
// clang/utils/TableGen/MveEmitter.cpp. Comments in there will probably be
// useful as well.
//
//===----------------------------------------------------------------------===//
// -----------------------------------------------------------------------------
// Forward declarations.
class Type;
// -----------------------------------------------------------------------------
// Dummy record used as the dag operator for the argument list of an intrinsic.
//
// We store arguments as a dag rather than a list<Type> so that we can give
// each one a name, to be used in codegen. For example, (args Vector:$a,
// Scalar:$b) defines the names $a and $b which the specification of the code
// for that intrinsic can refer to.
def args;
// -----------------------------------------------------------------------------
// Family of nodes for use in the codegen dag for an intrinsic, corresponding
// roughly to operations in LLVM IR. More precisely, they correspond to calls
// to methods of llvm::IRBuilder.
class IRBuilder<string func_> {
string func = func_; // the method name
list<int> address_params = []; // indices of parameters with type Address
list<int> int_constant_params = []; // indices of plain integer parameters
}
def add: IRBuilder<"CreateAdd">;
def or: IRBuilder<"CreateOr">;
def and: IRBuilder<"CreateAnd">;
def sub: IRBuilder<"CreateSub">;
def shl: IRBuilder<"CreateShl">;
def lshr: IRBuilder<"CreateLShr">;
def fadd: IRBuilder<"CreateFAdd">;
def fsub: IRBuilder<"CreateFSub">;
def load: IRBuilder<"CreateLoad"> { let address_params = [0]; }
def store: IRBuilder<"CreateStore"> { let address_params = [1]; }
def xval: IRBuilder<"CreateExtractValue"> { let int_constant_params = [1]; }
// Another node class you can use in the codegen dag. This one corresponds to
// an IR intrinsic function, which has to be specialized to a particular list
// of types.
class IRInt<string name_, list<Type> params_ = [], bit appendKind_ = 0> {
string intname = name_; // base name of the intrinsic, minus "arm_mve_"
list<Type> params = params_; // list of parameter types
// If this flag is set, then the IR intrinsic name will get a suffix _s, _u
// or _f depending on whether the main parameter type of the ACLE intrinsic
// being generated is a signed integer, unsigned integer, or float. Mostly
// this is useful for signed vs unsigned integers, because the ACLE
// intrinsics and the source-level integer types distinguish them, but at IR
// level the distinction has moved from the type system into the operations
// and you just have i32 or i16 etc. So when an IR intrinsic has to vary with
// signedness, you set this bit, and then you can still put the signed and
// unsigned versions in the same subclass of Intrinsic, and the Tablegen
// backend will take care of adding _s or _u as appropriate in each instance.
bit appendKind = appendKind_;
}
// The 'seq' node in a codegen dag specifies a set of IR operations to be
// performed in order. It has the special ability to define extra variable
// names, on top of the ones that refer to the intrinsic's parameters. For
// example:
//
// (seq (foo this, that):$a,
// (bar this, $a):$b
// (add $a, $b))
//
// defines the name $a to refer to the return value of the 'foo' operation;
// then the 'bar' operation uses $a as one of its arguments, and the return
// value of that is assigned the name $b; finally, $a and $b are added to give
// the return value of the seq construction as a whole.
def seq;
// If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it
// indicates that the IR generation for that intrinsic is done by handwritten
// C++ and not autogenerated at all. The effect in the MVE builtin codegen
// function is to break out of the main switch and fall through to the
// manual-codegen cases below it, having set the CustomCodeGenType enumerated
// variable to the value given by the 'type' string here.
class CustomCodegen<string type_> { string type = type_; }
// -----------------------------------------------------------------------------
// System for building up complex instances of Type from simple ones.
// ComplexType is used to represent any more complicated type: vectors,
// multivectors, pointers etc. Its dag argument specifies how the type should
// be constructed from simpler types. The operator of the dag will always be an
// instance of ComplexTypeOp, defined below.
class ComplexType<dag spec_>: Type { dag spec = spec_; }
// Operators you can use in the ComplexType spec dag. These are an intermediate
// layer, interpreted by MveEmitter::getType() in the Tablegen backend, and
// only used in the definitions below. Actual intrinsic definitions in
// arm_mve.td will use the defs defined below here.
class ComplexTypeOp;
def CTO_Parameter: ComplexTypeOp;
def CTO_Vec: ComplexTypeOp;
def CTO_Pred: ComplexTypeOp;
class CTO_Tuple<int n_>: ComplexTypeOp { int n = n_; }
class CTO_Pointer<bit const_>: ComplexTypeOp { bit const = const_; }
class CTO_Sign<bit signed_>: ComplexTypeOp { bit signed = signed_; }
// -----------------------------------------------------------------------------
// Instances of Type intended to be used directly in the specification of an
// intrinsic in arm_mve.td.
// The type Void can be used for the return type of an intrinsic, and as the
// parameter type for intrinsics that aren't actually parameterised by any kind
// of _s32 / _f16 / _u8 suffix.
def Void : Type;
// Primitive types: base class, and an instance for the set of scalar integer
// and floating types that MVE uses.
class PrimitiveType<string kind_, int size_>: Type {
string kind = kind_;
int size = size_;
}
// The type records defined by these foreaches have names like s32, f16, u8.
foreach size = [8, 16, 32, 64] in
foreach kind = ["u", "s"] in
def kind # size: PrimitiveType<kind, size>;
foreach size = [16, 32] in
foreach kind = ["f"] in
def kind # size: PrimitiveType<kind, size>;
// VecOf<t> expects t to be a scalar, and gives a 128-bit vector of whatever it
// is.
class VecOf<Type t>: ComplexType<(CTO_Vec t)>;
// PredOf expects t to be a scalar, and expands to a predicate vector which
// (logically speaking) has the same number of lanes as VecOf<t> would.
class PredOf<Type t>: ComplexType<(CTO_Pred t)>;
// Scalar expands to whatever is the main parameter type of the current
// intrinsic. Vector and Predicate expand to the vector and predicate types
// corresponding to that.
def Scalar: ComplexType<(CTO_Parameter)>;
def Vector: VecOf<Scalar>;
def Predicate: PredOf<Scalar>;
// MultiVector<n> expands to a type containing n instances of Vector. (There's
// no need to define this for a general underlying vector type, since it's only
// used by vld2q and friends, which don't need that generality.)
class MultiVector<int n>: ComplexType<(CTO_Tuple<n> Vector)>;
// Ptr<t> and CPtr<t> expand to a pointer to t, or a pointer to const t,
// respectively.
class Ptr<Type t>: ComplexType<(CTO_Pointer<0> t)>;
class CPtr<Type t>: ComplexType<(CTO_Pointer<1> t)>;
// Unsigned<t> expects t to be a scalar, and expands to the unsigned integer
// scalar of the same size. So it returns u16 if you give it s16 or f16 (or
// u16 itself).
class Unsigned<Type t>: ComplexType<(CTO_Sign<0> t)>;
// -----------------------------------------------------------------------------
// Internal definitions for specifying immediate arguments for an intrinsic.
class ImmediateBounds;
class Immediate<Type type_, ImmediateBounds bounds_>: Type {
Type type = type_;
ImmediateBounds bounds = bounds_;
string extra;
string extraarg;
}
class IB_ConstRange<int lo_, int hi_> : ImmediateBounds {
int lo = lo_;
int hi = hi_;
}
def IB_UEltValue : ImmediateBounds;
def IB_LaneIndex : ImmediateBounds;
class IB_EltBit<int base_> : ImmediateBounds { int base = base_; }
// -----------------------------------------------------------------------------
// End-user definitions for immediate arguments.
// imm_simd and imm_simd_restrictive are used for the immediate operands to
// intrinsics like vmvnq or vorrq. imm_simd_restrictive has to be an 8-bit
// value shifted left by a whole number of bytes; imm_simd_vmvn can also be of
// the form 0xXXFF for some byte value XX.
def imm_simd_restrictive : Immediate<u32, IB_UEltValue> {
let extra = "ShiftedByte";
}
def imm_simd_vmvn : Immediate<u32, IB_UEltValue> {
let extra = "ShiftedByteOrXXFF";
}
// imm_1toN can take any value from 1 to N inclusive, where N is the number of
// bits in the main parameter type. (E.g. an immediate shift count, in an
// intrinsic that shifts every lane of a vector by the same amount.)
//
// imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1
// inclusive.
def imm_1toN : Immediate<u32, IB_EltBit<1>>;
def imm_0toNm1 : Immediate<u32, IB_EltBit<0>>;
// imm_lane has to be the index of a vector lane in the main vector type, i.e
// it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane)
def imm_lane : Immediate<u32, IB_LaneIndex>;
// imm_1to32 can be in the range 1 to 32, unconditionally. (e.g. scalar shift
// intrinsics)
def imm_1to32 : Immediate<u32, IB_ConstRange<1, 32>>;
// imm_1248 can be 1, 2, 4 or 8. (e.g. vidupq)
def imm_1248 : Immediate<u32, IB_ConstRange<1, 8>> {
let extra = "Power2";
}
// imm_mem7bit<n> is a valid immediate offset for a load/store intrinsic whose
// memory access size is n bytes (e.g. 1 for vldrb_[whatever], 2 for vldrh,
// ...). The set of valid immediates for these is {0*n, 1*n, ..., 127*n}.
class imm_mem7bit<int membytes>
: Immediate<u32, IB_ConstRange<0, !mul(membytes, 127)>> {
let extra = !if(!eq(membytes, 1), ?, "Multiple");
let extraarg = !cast<string>(membytes);
}
// -----------------------------------------------------------------------------
// Specification of ways that the full name of an intrinsic can be mapped to
// its shorter polymorphic name.
class PolymorphicNameType<int nt_, string x_> {
int NumTypeSuffixesToDiscard = nt_;
string ExtraSuffixToDiscard = x_;
}
// PNT_None: the intrinsic is not polymorphic at all, so its short name is the
// same as its long name. (E.g. scalar shift intrinsics such as uqshl.)
def PNT_None: PolymorphicNameType<0, ?>;
// PNT_Type: the usual case, in which the polymorphic name is made by dropping
// the type suffix, so it ends up the same as the Tablegen record name. E.g.
// vaddq_u16 -> vaddq.
def PNT_Type: PolymorphicNameType<1, ?>;
// PNT_2Type: the polymorphic name is made by dropping _two_ type suffixes.
// E.g. vcvtq_f16_u16 -> vcvtq.
def PNT_2Type: PolymorphicNameType<2, ?>;
// PNT_NType: the polymorphic name is made by dropping an "_n" suffix and a
// type. E.g. vaddq_n_u16 -> vaddq.
def PNT_NType: PolymorphicNameType<1, "n">;
// PNT_NType: the polymorphic name is made by just dropping an "_n" suffix
// (even if it isn't at the end of the name). E.g. vidupq_n_u16 -> vidupq_u16.
def PNT_N: PolymorphicNameType<0, "n">;
// PNT_WBType: the polymorphic name is made by dropping an "_wb" suffix and a
// type. E.g. vidupq_m_wb_u16 -> vidupq_m.
def PNT_WBType: PolymorphicNameType<1, "wb">;
// PNT_WB: the polymorphic name is made by just dropping "_wb". E.g.
// vidupq_wb_u16 -> vidupq_u16.
def PNT_WB: PolymorphicNameType<0, "wb">;
// -----------------------------------------------------------------------------
// The main class Intrinsic. Define one of these for each family of ACLE
// intrinsics which are the same apart from some final type suffix (e.g.
// vaddq_{s8,u8,f16,...}.
//
// The record's name plus that type suffix is taken to be the full unambiguous
// name of the function. Its shorter polymorphic name is constructed from that
// in turn, in a way specified by the PolymorphicNameType system above.
class Intrinsic<Type ret_, dag args_, dag codegen_> {
// List of parameter types to suffix to this intrinsic's name. A separate
// actual ACLE intrinsic will be generated for each of these. Set it to
// [Void] if the intrinsic is not polymorphic at all.
list<Type> params;
// Return type and arguments for the intrinsic.
Type ret = ret_;
dag args = args_;
// Specification of how to generate its IR.
dag codegen = codegen_;
// Default to PNT_Type, which is by far the most common case.
PolymorphicNameType pnt = PNT_Type;
}
// Sometimes you have to use two separate Intrinsic declarations to
// declare intrinsics that are logically the same family (e.g. vaddq,
// because it needs to expand to an Add or FAdd IR node depending on
// type). For that purpose, you can derive from NameOverride to
// specify the intrinsic's base name independently of the Tablegen
// record name.
class NameOverride<string basename_> {
string basename = basename_;
}
// -----------------------------------------------------------------------------
// Convenience lists of parameter types. 'T' is just a container record, so you
// can define a typical intrinsic with 'let Params = T.Usual', or similar,
// instead of having to repeat a long list every time.
def T {
list<Type> Signed = [s8, s16, s32];
list<Type> Unsigned = [u8, u16, u32];
list<Type> Int = Signed # Unsigned;
list<Type> Float = [f16, f32];
list<Type> Usual = Int # Float;
list<Type> Int8 = [s8, u8];
list<Type> Int16 = [s16, u16];
list<Type> Int32 = [s32, u32];
list<Type> Int64 = [s64, u64];
list<Type> All8 = Int8;
list<Type> All16 = Int16 # [f16];
list<Type> All32 = Int32 # [f32];
list<Type> All64 = Int64;
list<Type> All = Usual # All64;
}

View File

@ -11270,6 +11270,7 @@ private:
bool CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall,
unsigned MaxWidth);
bool CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool CheckAArch64BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
@ -11317,6 +11318,9 @@ private:
int High, bool RangeIsError = true);
bool SemaBuiltinConstantArgMultiple(CallExpr *TheCall, int ArgNum,
unsigned Multiple);
bool SemaBuiltinConstantArgPower2(CallExpr *TheCall, int ArgNum);
bool SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum);
bool SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, int ArgNum);
bool SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
int ArgNum, unsigned ExpectedFieldNum,
bool AllowName);

View File

@ -4218,7 +4218,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
}
// See if we have a target specific builtin that needs to be lowered.
if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue))
return RValue::get(V);
ErrorUnsupported(E, "builtin function");
@ -4229,13 +4229,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue,
llvm::Triple::ArchType Arch) {
switch (Arch) {
case llvm::Triple::arm:
case llvm::Triple::armeb:
case llvm::Triple::thumb:
case llvm::Triple::thumbeb:
return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch);
return CGF->EmitARMBuiltinExpr(BuiltinID, E, ReturnValue, Arch);
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
@ -4268,15 +4269,16 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
}
Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
const CallExpr *E) {
const CallExpr *E,
ReturnValueSlot ReturnValue) {
if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
assert(getContext().getAuxTargetInfo() && "Missing aux target info");
return EmitTargetArchBuiltinExpr(
this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
getContext().getAuxTargetInfo()->getTriple().getArch());
ReturnValue, getContext().getAuxTargetInfo()->getTriple().getArch());
}
return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
return EmitTargetArchBuiltinExpr(this, BuiltinID, E, ReturnValue,
getTarget().getTriple().getArch());
}
@ -6004,6 +6006,7 @@ static bool HasExtraNeonArgument(unsigned BuiltinID) {
Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue,
llvm::Triple::ArchType Arch) {
if (auto Hint = GetValueForARMHint(BuiltinID))
return Hint;
@ -6320,6 +6323,10 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
}
// Deal with MVE builtins
if (Value *Result = EmitARMMVEBuiltinExpr(BuiltinID, E, ReturnValue, Arch))
return Result;
// Find out if any arguments are required to be integer constant
// expressions.
unsigned ICEArguments = 0;
@ -6769,6 +6776,109 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
}
}
Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue,
llvm::Triple::ArchType Arch) {
enum class CustomCodeGen { VLD24, VST24 } CustomCodeGenType;
Intrinsic::ID IRIntr;
unsigned NumVectors;
// Code autogenerated by Tablegen will handle all the simple builtins.
switch (BuiltinID) {
#include "clang/Basic/arm_mve_builtin_cg.inc"
// If we didn't match an MVE builtin id at all, go back to the
// main EmitARMBuiltinExpr.
default:
return nullptr;
}
// Anything that breaks from that switch is an MVE builtin that
// needs handwritten code to generate.
switch (CustomCodeGenType) {
case CustomCodeGen::VLD24: {
llvm::SmallVector<Value *, 4> Ops;
llvm::SmallVector<llvm::Type *, 4> Tys;
auto MvecCType = E->getType();
auto MvecLType = ConvertType(MvecCType);
assert(MvecLType->isStructTy() &&
"Return type for vld[24]q should be a struct");
assert(MvecLType->getStructNumElements() == 1 &&
"Return-type struct for vld[24]q should have one element");
auto MvecLTypeInner = MvecLType->getStructElementType(0);
assert(MvecLTypeInner->isArrayTy() &&
"Return-type struct for vld[24]q should contain an array");
assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
"Array member of return-type struct vld[24]q has wrong length");
auto VecLType = MvecLTypeInner->getArrayElementType();
Tys.push_back(VecLType);
auto Addr = E->getArg(0);
Ops.push_back(EmitScalarExpr(Addr));
Tys.push_back(ConvertType(Addr->getType()));
Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys));
Value *LoadResult = Builder.CreateCall(F, Ops);
Value *MvecOut = UndefValue::get(MvecLType);
for (unsigned i = 0; i < NumVectors; ++i) {
Value *Vec = Builder.CreateExtractValue(LoadResult, i);
MvecOut = Builder.CreateInsertValue(MvecOut, Vec, {0, i});
}
if (ReturnValue.isNull())
return MvecOut;
else
return Builder.CreateStore(MvecOut, ReturnValue.getValue());
}
case CustomCodeGen::VST24: {
llvm::SmallVector<Value *, 4> Ops;
llvm::SmallVector<llvm::Type *, 4> Tys;
auto Addr = E->getArg(0);
Ops.push_back(EmitScalarExpr(Addr));
Tys.push_back(ConvertType(Addr->getType()));
auto MvecCType = E->getArg(1)->getType();
auto MvecLType = ConvertType(MvecCType);
assert(MvecLType->isStructTy() && "Data type for vst2q should be a struct");
assert(MvecLType->getStructNumElements() == 1 &&
"Data-type struct for vst2q should have one element");
auto MvecLTypeInner = MvecLType->getStructElementType(0);
assert(MvecLTypeInner->isArrayTy() &&
"Data-type struct for vst2q should contain an array");
assert(MvecLTypeInner->getArrayNumElements() == NumVectors &&
"Array member of return-type struct vld[24]q has wrong length");
auto VecLType = MvecLTypeInner->getArrayElementType();
Tys.push_back(VecLType);
AggValueSlot MvecSlot = CreateAggTemp(MvecCType);
EmitAggExpr(E->getArg(1), MvecSlot);
auto Mvec = Builder.CreateLoad(MvecSlot.getAddress());
for (unsigned i = 0; i < NumVectors; i++)
Ops.push_back(Builder.CreateExtractValue(Mvec, {0, i}));
Function *F = CGM.getIntrinsic(IRIntr, makeArrayRef(Tys));
Value *ToReturn = nullptr;
for (unsigned i = 0; i < NumVectors; i++) {
Ops.push_back(llvm::ConstantInt::get(Int32Ty, i));
ToReturn = Builder.CreateCall(F, Ops);
Ops.pop_back();
}
return ToReturn;
}
default:
llvm_unreachable("bad CustomCodegen enum value");
}
}
static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
const CallExpr *E,
SmallVectorImpl<Value *> &Ops,

View File

@ -3726,14 +3726,19 @@ public:
/// EmitTargetBuiltinExpr - Emit the given builtin call. Returns 0 if the call
/// is unhandled by the current target.
llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue);
llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty,
const llvm::CmpInst::Predicate Fp,
const llvm::CmpInst::Predicate Ip,
const llvm::Twine &Name = "");
llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue,
llvm::Triple::ArchType Arch);
llvm::Value *EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
ReturnValueSlot ReturnValue,
llvm::Triple::ArchType Arch);
llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
unsigned LLVMIntrinsic,

View File

@ -182,6 +182,8 @@ endforeach( f )
clang_generate_header(-gen-arm-neon arm_neon.td arm_neon.h)
# Generate arm_fp16.h
clang_generate_header(-gen-arm-fp16 arm_fp16.td arm_fp16.h)
# Generate arm_mve.h
clang_generate_header(-gen-arm-mve-header arm_mve.td arm_mve.h)
add_custom_target(clang-resource-headers ALL DEPENDS ${out_files})
set_target_properties(clang-resource-headers PROPERTIES

View File

@ -1717,6 +1717,14 @@ bool Sema::CheckNeonBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
return SemaBuiltinConstantArgRange(TheCall, i, l, u + l);
}
bool Sema::CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
switch (BuiltinID) {
default:
return false;
#include "clang/Basic/arm_mve_builtin_sema.inc"
}
}
bool Sema::CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall,
unsigned MaxWidth) {
assert((BuiltinID == ARM::BI__builtin_arm_ldrex ||
@ -1857,6 +1865,8 @@ bool Sema::CheckARMBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
if (CheckNeonBuiltinFunctionCall(BuiltinID, TheCall))
return true;
if (CheckMVEBuiltinFunctionCall(BuiltinID, TheCall))
return true;
// For intrinsics which take an immediate value as part of the instruction,
// range check them here.
@ -6235,6 +6245,101 @@ bool Sema::SemaBuiltinConstantArgMultiple(CallExpr *TheCall, int ArgNum,
return false;
}
/// SemaBuiltinConstantArgPower2 - Check if argument ArgNum of TheCall is a
/// constant expression representing a power of 2.
bool Sema::SemaBuiltinConstantArgPower2(CallExpr *TheCall, int ArgNum) {
llvm::APSInt Result;
// We can't check the value of a dependent argument.
Expr *Arg = TheCall->getArg(ArgNum);
if (Arg->isTypeDependent() || Arg->isValueDependent())
return false;
// Check constant-ness first.
if (SemaBuiltinConstantArg(TheCall, ArgNum, Result))
return true;
// Bit-twiddling to test for a power of 2: for x > 0, x & (x-1) is zero if
// and only if x is a power of 2.
if (Result.isStrictlyPositive() && (Result & (Result - 1)) == 0)
return false;
return Diag(TheCall->getBeginLoc(), diag::err_argument_not_power_of_2)
<< Arg->getSourceRange();
}
static bool IsShiftedByte(llvm::APSInt Value) {
if (Value.isNegative())
return false;
// Check if it's a shifted byte, by shifting it down
while (true) {
// If the value fits in the bottom byte, the check passes.
if (Value < 0x100)
return true;
// Otherwise, if the value has _any_ bits in the bottom byte, the check
// fails.
if ((Value & 0xFF) != 0)
return false;
// If the bottom 8 bits are all 0, but something above that is nonzero,
// then shifting the value right by 8 bits won't affect whether it's a
// shifted byte or not. So do that, and go round again.
Value >>= 8;
}
}
/// SemaBuiltinConstantArgShiftedByte - Check if argument ArgNum of TheCall is
/// a constant expression representing an arbitrary byte value shifted left by
/// a multiple of 8 bits.
bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum) {
llvm::APSInt Result;
// We can't check the value of a dependent argument.
Expr *Arg = TheCall->getArg(ArgNum);
if (Arg->isTypeDependent() || Arg->isValueDependent())
return false;
// Check constant-ness first.
if (SemaBuiltinConstantArg(TheCall, ArgNum, Result))
return true;
if (IsShiftedByte(Result))
return false;
return Diag(TheCall->getBeginLoc(), diag::err_argument_not_shifted_byte)
<< Arg->getSourceRange();
}
/// SemaBuiltinConstantArgShiftedByteOr0xFF - Check if argument ArgNum of
/// TheCall is a constant expression representing either a shifted byte value,
/// or a value of the form 0x??FF (i.e. a member of the arithmetic progression
/// 0x00FF, 0x01FF, ..., 0xFFFF). This strange range check is needed for some
/// Arm MVE intrinsics.
bool Sema::SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall,
int ArgNum) {
llvm::APSInt Result;
// We can't check the value of a dependent argument.
Expr *Arg = TheCall->getArg(ArgNum);
if (Arg->isTypeDependent() || Arg->isValueDependent())
return false;
// Check constant-ness first.
if (SemaBuiltinConstantArg(TheCall, ArgNum, Result))
return true;
// Check to see if it's in either of the required forms.
if (IsShiftedByte(Result) ||
(Result > 0 && Result < 0x10000 && (Result & 0xFF) == 0xFF))
return false;
return Diag(TheCall->getBeginLoc(),
diag::err_argument_not_shifted_byte_or_xxff)
<< Arg->getSourceRange();
}
/// SemaBuiltinARMMemoryTaggingCall - Handle calls of memory tagging extensions
bool Sema::SemaBuiltinARMMemoryTaggingCall(unsigned BuiltinID, CallExpr *TheCall) {
if (BuiltinID == AArch64::BI__builtin_arm_irg) {

View File

@ -4832,8 +4832,13 @@ static void handleXRayLogArgsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
}
static bool ArmMveAliasValid(unsigned BuiltinID, StringRef AliasName) {
// FIXME: this will be filled in by Tablegen which isn't written yet
return false;
if (AliasName.startswith("__arm_"))
AliasName = AliasName.substr(6);
switch (BuiltinID) {
#include "clang/Basic/arm_mve_builtin_aliases.inc"
default:
return false;
}
}
static void handleArmMveAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) {

View File

@ -7261,8 +7261,10 @@ static bool isPermittedNeonBaseType(QualType &Ty,
/// match one of the standard Neon vector types.
static void HandleNeonVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
Sema &S, VectorType::VectorKind VecKind) {
// Target must have NEON
if (!S.Context.getTargetInfo().hasFeature("neon")) {
// Target must have NEON (or MVE, whose vectors are similar enough
// not to need a separate attribute)
if (!S.Context.getTargetInfo().hasFeature("neon") &&
!S.Context.getTargetInfo().hasFeature("mve")) {
S.Diag(Attr.getLoc(), diag::err_attribute_unsupported) << Attr;
Attr.setInvalid();
return;

View File

@ -0,0 +1,23 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
#include <arm_mve.h>
// CHECK-LABEL: @test_urshrl(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[VALUE:%.*]], 32
// CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
// CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[VALUE]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.urshrl(i32 [[TMP2]], i32 [[TMP1]], i32 6)
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
// CHECK-NEXT: [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
// CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 32
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
// CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
// CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
// CHECK-NEXT: ret i64 [[TMP9]]
//
uint64_t test_urshrl(uint64_t value)
{
return urshrl(value, 6);
}

View File

@ -0,0 +1,89 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
#include <arm_mve.h>
// CHECK-LABEL: @test_vadciq_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
// CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 29
// CHECK-NEXT: [[TMP3:%.*]] = and i32 1, [[TMP2]]
// CHECK-NEXT: store i32 [[TMP3]], i32* [[CARRY_OUT:%.*]], align 4
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
// CHECK-NEXT: ret <4 x i32> [[TMP4]]
//
int32x4_t test_vadciq_s32(int32x4_t a, int32x4_t b, unsigned *carry_out)
{
#ifdef POLYMORPHIC
return vadciq(a, b, carry_out);
#else /* POLYMORPHIC */
return vadciq_s32(a, b, carry_out);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vadcq_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CARRY:%.*]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29
// CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
// CHECK-NEXT: store i32 [[TMP5]], i32* [[CARRY]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
// CHECK-NEXT: ret <4 x i32> [[TMP6]]
//
uint32x4_t test_vadcq_u32(uint32x4_t a, uint32x4_t b, unsigned *carry)
{
#ifdef POLYMORPHIC
return vadcq(a, b, carry);
#else /* POLYMORPHIC */
return vadcq_u32(a, b, carry);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vadciq_m_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 1
// CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 29
// CHECK-NEXT: [[TMP5:%.*]] = and i32 1, [[TMP4]]
// CHECK-NEXT: store i32 [[TMP5]], i32* [[CARRY_OUT:%.*]], align 4
// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
// CHECK-NEXT: ret <4 x i32> [[TMP6]]
//
uint32x4_t test_vadciq_m_u32(uint32x4_t inactive, uint32x4_t a, uint32x4_t b, unsigned *carry_out, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vadciq_m(inactive, a, b, carry_out, p);
#else /* POLYMORPHIC */
return vadciq_m_u32(inactive, a, b, carry_out, p);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vadcq_m_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CARRY:%.*]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 29
// CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP3:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vadc.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 [[TMP1]], <4 x i1> [[TMP3]])
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 1
// CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP5]], 29
// CHECK-NEXT: [[TMP7:%.*]] = and i32 1, [[TMP6]]
// CHECK-NEXT: store i32 [[TMP7]], i32* [[CARRY]], align 4
// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP4]], 0
// CHECK-NEXT: ret <4 x i32> [[TMP8]]
//
int32x4_t test_vadcq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vadcq_m(inactive, a, b, carry, p);
#else /* POLYMORPHIC */
return vadcq_m_s32(inactive, a, b, carry, p);
#endif /* POLYMORPHIC */
}

View File

@ -0,0 +1,65 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
#include <arm_mve.h>
// CHECK-LABEL: @test_vaddq_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = add <4 x i32> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
//
uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b)
{
#ifdef POLYMORPHIC
return vaddq(a, b);
#else /* POLYMORPHIC */
return vaddq_u32(a, b);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vsubq_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = fsub <8 x half> [[A:%.*]], [[B:%.*]]
// CHECK-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vsubq_f16(float16x8_t a, float16x8_t b)
{
#ifdef POLYMORPHIC
return vsubq(a, b);
#else /* POLYMORPHIC */
return vsubq_f16(a, b);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vaddq_m_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.add.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <16 x i8> [[TMP2]]
//
int8x16_t test_vaddq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vaddq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vaddq_m_s8(inactive, a, b, p);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vsubq_m_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.sub.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]])
// CHECK-NEXT: ret <4 x float> [[TMP2]]
//
float32x4_t test_vsubq_m_f32(float32x4_t inactive, float32x4_t a, float32x4_t b, mve_pred16_t p)
{
#ifdef POLYMORPHIC
return vsubq_m(inactive, a, b, p);
#else /* POLYMORPHIC */
return vsubq_m_f32(inactive, a, b, p);
#endif /* POLYMORPHIC */
}

View File

@ -0,0 +1,26 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
#include <arm_mve.h>
// CHECK-LABEL: @test_vcvttq_f16_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1)
// CHECK-NEXT: ret <8 x half> [[TMP0]]
//
float16x8_t test_vcvttq_f16_f32(float16x8_t a, float32x4_t b)
{
return vcvttq_f16_f32(a, b);
}
// CHECK-LABEL: @test_vcvttq_m_f16_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow.predicated(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1, <4 x i1> [[TMP1]])
// CHECK-NEXT: ret <8 x half> [[TMP2]]
//
float16x8_t test_vcvttq_m_f16_f32(float16x8_t a, float32x4_t b, mve_pred16_t p)
{
return vcvttq_m_f16_f32(a, b, p);
}

View File

@ -0,0 +1,100 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
#include <arm_mve.h>
// CHECK-LABEL: @test_vld2q_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call { <8 x half>, <8 x half> } @llvm.arm.mve.vld2q.v8f16.p0f16(half* [[ADDR:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <8 x half>, <8 x half> } [[TMP0]], 0
// CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T:%.*]] undef, <8 x half> [[TMP1]], 0, 0
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x half>, <8 x half> } [[TMP0]], 1
// CHECK-NEXT: [[TMP4:%.*]] = insertvalue [[STRUCT_FLOAT16X8X2_T]] %2, <8 x half> [[TMP3]], 0, 1
// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] %4
//
float16x8x2_t test_vld2q_f16(const float16_t *addr)
{
#ifdef POLYMORPHIC
return vld2q(addr);
#else /* POLYMORPHIC */
return vld2q_f16(addr);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vld4q_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.mve.vld4q.v16i8.p0i8(i8* [[ADDR:%.*]])
// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP0]], 0
// CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T:%.*]] undef, <16 x i8> [[TMP1]], 0, 0
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP0]], 1
// CHECK-NEXT: [[TMP4:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T]] %2, <16 x i8> [[TMP3]], 0, 1
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP0]], 2
// CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T]] %4, <16 x i8> [[TMP5]], 0, 2
// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP0]], 3
// CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT_UINT8X16X4_T]] %6, <16 x i8> [[TMP7]], 0, 3
// CHECK-NEXT: ret [[STRUCT_UINT8X16X4_T]] %8
//
uint8x16x4_t test_vld4q_u8(const uint8_t *addr)
{
#ifdef POLYMORPHIC
return vld4q(addr);
#else /* POLYMORPHIC */
return vld4q_u8(addr);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vst2q_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T:%.*]] %value.coerce, 0, 0
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T]] %value.coerce, 0, 1
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0i32.v4i32(i32* [[ADDR:%.*]], <4 x i32> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <4 x i32> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 0)
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0i32.v4i32(i32* [[ADDR]], <4 x i32> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <4 x i32> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 1)
// CHECK-NEXT: ret void
//
void test_vst2q_u32(uint32_t *addr, uint32x4x2_t value)
{
#ifdef POLYMORPHIC
vst2q(addr, value);
#else /* POLYMORPHIC */
vst2q_u32(addr, value);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vst4q_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T:%.*]] %value.coerce, 0, 0
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] %value.coerce, 0, 1
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_2_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] %value.coerce, 0, 2
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_3_EXTRACT:%.*]] = extractvalue [[STRUCT_INT8X16X4_T]] %value.coerce, 0, 3
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0i8.v16i8(i8* [[ADDR:%.*]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 0)
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0i8.v16i8(i8* [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 1)
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0i8.v16i8(i8* [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 2)
// CHECK-NEXT: call void @llvm.arm.mve.vst4q.p0i8.v16i8(i8* [[ADDR]], <16 x i8> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_1_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_2_EXTRACT]], <16 x i8> [[VALUE_COERCE_FCA_0_3_EXTRACT]], i32 3)
// CHECK-NEXT: ret void
//
void test_vst4q_s8(int8_t *addr, int8x16x4_t value)
{
#ifdef POLYMORPHIC
vst4q(addr, value);
#else /* POLYMORPHIC */
vst4q_s8(addr, value);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vst2q_f16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_0_EXTRACT:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T:%.*]] %value.coerce, 0, 0
// CHECK-NEXT: [[VALUE_COERCE_FCA_0_1_EXTRACT:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] %value.coerce, 0, 1
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0f16.v8f16(half* [[ADDR:%.*]], <8 x half> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <8 x half> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 0)
// CHECK-NEXT: call void @llvm.arm.mve.vst2q.p0f16.v8f16(half* [[ADDR]], <8 x half> [[VALUE_COERCE_FCA_0_0_EXTRACT]], <8 x half> [[VALUE_COERCE_FCA_0_1_EXTRACT]], i32 1)
// CHECK-NEXT: ret void
//
void test_vst2q_f16(float16_t *addr, float16x8x2_t value)
{
#ifdef POLYMORPHIC
vst2q(addr, value);
#else /* POLYMORPHIC */
vst2q_f16(addr, value);
#endif /* POLYMORPHIC */
}

View File

@ -0,0 +1,48 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
#include <arm_mve.h>
// CHECK-LABEL: @test_vldrwq_gather_base_wb_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> [[TMP0]], i32 80)
// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 1
// CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[ADDR]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP1]], 0
// CHECK-NEXT: ret <4 x i32> [[TMP3]]
//
int32x4_t test_vldrwq_gather_base_wb_s32(uint32x4_t *addr)
{
return vldrwq_gather_base_wb_s32(addr, 0x50);
}
// CHECK-LABEL: @test_vldrwq_gather_base_wb_f32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* [[ADDR:%.*]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> [[TMP0]], i32 64)
// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP1]], 1
// CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[ADDR]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x float>, <4 x i32> } [[TMP1]], 0
// CHECK-NEXT: ret <4 x float> [[TMP3]]
//
float32x4_t test_vldrwq_gather_base_wb_f32(uint32x4_t *addr)
{
return vldrwq_gather_base_wb_f32(addr, 0x40);
}
// CHECK-LABEL: @test_vldrdq_gather_base_wb_z_u64(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
// CHECK-NEXT: [[TMP3:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> [[TMP0]], i32 656, <4 x i1> [[TMP2]])
// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 1
// CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[ADDR]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP3]], 0
// CHECK-NEXT: ret <2 x i64> [[TMP5]]
//
uint64x2_t test_vldrdq_gather_base_wb_z_u64(uint64x2_t *addr, mve_pred16_t p)
{
return vldrdq_gather_base_wb_z_u64(addr, 0x290, p);
}

View File

@ -0,0 +1,97 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
#include <arm_mve.h>
// CHECK-LABEL: @test_vminvq_s8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[A:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.minv.s.v16i8(i32 [[TMP0]], <16 x i8> [[B:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
// CHECK-NEXT: ret i8 [[TMP2]]
//
int8_t test_vminvq_s8(int8_t a, int8x16_t b)
{
#ifdef POLYMORPHIC
return vminvq(a, b);
#else /* POLYMORPHIC */
return vminvq_s8(a, b);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vminvq_s16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[A:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.minv.s.v8i16(i32 [[TMP0]], <8 x i16> [[B:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: ret i16 [[TMP2]]
//
int16_t test_vminvq_s16(int16_t a, int16x8_t b)
{
#ifdef POLYMORPHIC
return vminvq(a, b);
#else /* POLYMORPHIC */
return vminvq_s16(a, b);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vminvq_s32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.mve.minv.s.v4i32(i32 [[A:%.*]], <4 x i32> [[B:%.*]])
// CHECK-NEXT: ret i32 [[TMP0]]
//
int32_t test_vminvq_s32(int32_t a, int32x4_t b)
{
#ifdef POLYMORPHIC
return vminvq(a, b);
#else /* POLYMORPHIC */
return vminvq_s32(a, b);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vminvq_u8(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[A:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.minv.u.v16i8(i32 [[TMP0]], <16 x i8> [[B:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8
// CHECK-NEXT: ret i8 [[TMP2]]
//
uint8_t test_vminvq_u8(uint8_t a, uint8x16_t b)
{
#ifdef POLYMORPHIC
return vminvq(a, b);
#else /* POLYMORPHIC */
return vminvq_u8(a, b);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vminvq_u16(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[A:%.*]] to i32
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.minv.u.v8i16(i32 [[TMP0]], <8 x i16> [[B:%.*]])
// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
// CHECK-NEXT: ret i16 [[TMP2]]
//
uint16_t test_vminvq_u16(uint16_t a, uint16x8_t b)
{
#ifdef POLYMORPHIC
return vminvq(a, b);
#else /* POLYMORPHIC */
return vminvq_u16(a, b);
#endif /* POLYMORPHIC */
}
// CHECK-LABEL: @test_vminvq_u32(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.arm.mve.minv.u.v4i32(i32 [[A:%.*]], <4 x i32> [[B:%.*]])
// CHECK-NEXT: ret i32 [[TMP0]]
//
uint32_t test_vminvq_u32(uint32_t a, uint32x4_t b)
{
#ifdef POLYMORPHIC
return vminvq(a, b);
#else /* POLYMORPHIC */
return vminvq_u32(a, b);
#endif /* POLYMORPHIC */
}

View File

@ -14,6 +14,7 @@ add_tablegen(clang-tblgen CLANG
ClangSACheckersEmitter.cpp
ClangTypeNodesEmitter.cpp
NeonEmitter.cpp
MveEmitter.cpp
TableGen.cpp
)
set_target_properties(clang-tblgen PROPERTIES FOLDER "Clang tablegenning")

File diff suppressed because it is too large Load Diff

View File

@ -60,6 +60,11 @@ enum ActionType {
GenArmFP16,
GenArmNeonSema,
GenArmNeonTest,
GenArmMveHeader,
GenArmMveBuiltinDef,
GenArmMveBuiltinSema,
GenArmMveBuiltinCG,
GenArmMveBuiltinAliases,
GenAttrDocs,
GenDiagDocs,
GenOptDocs,
@ -162,6 +167,16 @@ cl::opt<ActionType> Action(
"Generate ARM NEON sema support for clang"),
clEnumValN(GenArmNeonTest, "gen-arm-neon-test",
"Generate ARM NEON tests for clang"),
clEnumValN(GenArmMveHeader, "gen-arm-mve-header",
"Generate arm_mve.h for clang"),
clEnumValN(GenArmMveBuiltinDef, "gen-arm-mve-builtin-def",
"Generate ARM MVE builtin definitions for clang"),
clEnumValN(GenArmMveBuiltinSema, "gen-arm-mve-builtin-sema",
"Generate ARM MVE builtin sema checks for clang"),
clEnumValN(GenArmMveBuiltinCG, "gen-arm-mve-builtin-codegen",
"Generate ARM MVE builtin code-generator for clang"),
clEnumValN(GenArmMveBuiltinAliases, "gen-arm-mve-builtin-aliases",
"Generate list of valid ARM MVE builtin aliases for clang"),
clEnumValN(GenAttrDocs, "gen-attr-docs",
"Generate attribute documentation"),
clEnumValN(GenDiagDocs, "gen-diag-docs",
@ -296,6 +311,21 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenArmNeonTest:
EmitNeonTest(Records, OS);
break;
case GenArmMveHeader:
EmitMveHeader(Records, OS);
break;
case GenArmMveBuiltinDef:
EmitMveBuiltinDef(Records, OS);
break;
case GenArmMveBuiltinSema:
EmitMveBuiltinSema(Records, OS);
break;
case GenArmMveBuiltinCG:
EmitMveBuiltinCG(Records, OS);
break;
case GenArmMveBuiltinAliases:
EmitMveBuiltinAliases(Records, OS);
break;
case GenAttrDocs:
EmitClangAttrDocs(Records, OS);
break;

View File

@ -88,6 +88,12 @@ void EmitNeon2(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitNeonSema2(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitNeonTest2(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveBuiltinSema(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveBuiltinAliases(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitClangAttrDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitClangDiagDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitClangOptDocs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);