From 2ba5d64a80d0debaec396dbd512e59de9cc82e48 Mon Sep 17 00:00:00 2001 From: Cullen Rhodes Date: Tue, 27 Aug 2019 12:57:09 +0000 Subject: [PATCH] [IntrinsicEmitter] Support scalable vectors in intrinsics Summary: This patch adds support for scalable vectors in intrinsics, enabling intrinsics such as the following to be defined: declare @llvm.something.nxv4i32() Support for this is implemented by defining a new type descriptor for scalable vectors and adding mangling support for scalable vector types in the name mangling scheme used by 'any' types in intrinsic signatures. Tests have been added for IRBuilder to test scalable vectors work as expected when using intrinsics through this interface. This required implementing an intrinsic that is explicitly defined with scalable vectors, e.g. LLVMType, an SVE floating-point convert intrinsic was used for this. The behaviour of the overloaded type LLVMScalarOrSameVectorWidth with scalable vectors is tested using the existing masked load intrinsic. Also added an .ll test to test the Verifier catches a bad intrinsic argument when passing a fixed-width predicate (mask) to the masked.load intrinsic where a scalable is expected. Patch by Paul Walker Reviewed By: sdesmalen Differential Revision: https://reviews.llvm.org/D65930 llvm-svn: 370053 --- llvm/include/llvm/IR/Intrinsics.h | 2 +- llvm/include/llvm/IR/IntrinsicsAArch64.td | 28 ++++++++++++ llvm/include/llvm/Support/ScalableSize.h | 3 ++ llvm/lib/IR/Function.cpp | 35 ++++++++++++--- llvm/test/Verifier/intrinsic-bad-arg-type.ll | 10 +++++ llvm/unittests/IR/IRBuilderTest.cpp | 45 ++++++++++++++++++++ llvm/utils/TableGen/IntrinsicEmitter.cpp | 5 ++- 7 files changed, 119 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Verifier/intrinsic-bad-arg-type.ll diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h index f38f92022d21..46e68a7cfcc0 100644 --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -100,7 +100,7 @@ namespace Intrinsic { Integer, Vector, Pointer, Struct, Argument, ExtendArgument, TruncArgument, HalfVecArgument, SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt, - VecElementArgument + VecElementArgument, ScalableVecArgument } Kind; union { diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index feb986c3e336..dc2ead3aeba2 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -748,3 +748,31 @@ def int_aarch64_ttest : GCCBuiltin<"__builtin_arm_ttest">, Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>; } + +//===----------------------------------------------------------------------===// +// SVE + +def llvm_nxv2i1_ty : LLVMType; +def llvm_nxv4i1_ty : LLVMType; +def llvm_nxv8i1_ty : LLVMType; +def llvm_nxv16i1_ty : LLVMType; +def llvm_nxv16i8_ty : LLVMType; +def llvm_nxv4i32_ty : LLVMType; +def llvm_nxv2i64_ty : LLVMType; +def llvm_nxv8f16_ty : LLVMType; +def llvm_nxv4f32_ty : LLVMType; +def llvm_nxv2f64_ty : LLVMType; + +let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". + // This class of intrinsics are not intended to be useful within LLVM IR but + // are instead here to support some of the more regid parts of the ACLE. + class Builtin_SVCVT + : GCCBuiltin<"__builtin_sve_" # name>, + Intrinsic<[OUT], [OUT, llvm_nxv16i1_ty, IN], [IntrNoMem]>; +} + +// +// Floating-point comparisons +// + +def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; diff --git a/llvm/include/llvm/Support/ScalableSize.h b/llvm/include/llvm/Support/ScalableSize.h index 96bf043773a0..a057d5ea7ce5 100644 --- a/llvm/include/llvm/Support/ScalableSize.h +++ b/llvm/include/llvm/Support/ScalableSize.h @@ -36,6 +36,9 @@ public: bool operator==(const ElementCount& RHS) const { return Min == RHS.Min && Scalable == RHS.Scalable; } + bool operator!=(const ElementCount& RHS) const { + return !(*this == RHS); + } }; } // end namespace llvm diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 462458d70652..34aad7ddd036 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -611,9 +611,11 @@ static std::string getMangledTypeStr(Type* Ty) { Result += "vararg"; // Ensure nested function types are distinguishable. Result += "f"; - } else if (isa(Ty)) { - Result += "v" + utostr(Ty->getVectorNumElements()) + - getMangledTypeStr(Ty->getVectorElementType()); + } else if (VectorType* VTy = dyn_cast(Ty)) { + if (VTy->isScalable()) + Result += "nx"; + Result += "v" + utostr(VTy->getVectorNumElements()) + + getMangledTypeStr(VTy->getVectorElementType()); } else if (Ty) { switch (Ty->getTypeID()) { default: llvm_unreachable("Unhandled type"); @@ -700,7 +702,8 @@ enum IIT_Info { IIT_STRUCT7 = 39, IIT_STRUCT8 = 40, IIT_F128 = 41, - IIT_VEC_ELEMENT = 42 + IIT_VEC_ELEMENT = 42, + IIT_SCALABLE_VEC = 43 }; static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, @@ -871,6 +874,12 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, ArgInfo)); return; } + case IIT_SCALABLE_VEC: { + OutputTable.push_back(IITDescriptor::get(IITDescriptor::ScalableVecArgument, + 0)); + DecodeIITType(NextElt, Infos, OutputTable); + return; + } } llvm_unreachable("unhandled"); } @@ -968,7 +977,7 @@ static Type *DecodeFixedType(ArrayRef &Infos, Type *EltTy = DecodeFixedType(Infos, Tys, Context); Type *Ty = Tys[D.getArgumentNumber()]; if (auto *VTy = dyn_cast(Ty)) - return VectorType::get(EltTy, VTy->getNumElements()); + return VectorType::get(EltTy, VTy->getElementCount()); return EltTy; } case IITDescriptor::PtrToArgument: { @@ -992,6 +1001,11 @@ static Type *DecodeFixedType(ArrayRef &Infos, case IITDescriptor::VecOfAnyPtrsToElt: // Return the overloaded type (which determines the pointers address space) return Tys[D.getOverloadArgNumber()]; + case IITDescriptor::ScalableVecArgument: { + Type *Ty = DecodeFixedType(Infos, Tys, Context); + return VectorType::get(Ty->getVectorElementType(), + { Ty->getVectorNumElements(), true }); + } } llvm_unreachable("unhandled"); } @@ -1191,8 +1205,8 @@ static bool matchIntrinsicType( return true; Type *EltTy = Ty; if (ThisArgType) { - if (ReferenceType->getVectorNumElements() != - ThisArgType->getVectorNumElements()) + if (ReferenceType->getElementCount() != + ThisArgType->getElementCount()) return true; EltTy = ThisArgType->getVectorElementType(); } @@ -1255,6 +1269,13 @@ static bool matchIntrinsicType( auto *ReferenceType = dyn_cast(ArgTys[D.getArgumentNumber()]); return !ReferenceType || Ty != ReferenceType->getElementType(); } + case IITDescriptor::ScalableVecArgument: { + VectorType *VTy = dyn_cast(Ty); + if (!VTy || !VTy->isScalable()) + return true; + return matchIntrinsicType(VTy, Infos, ArgTys, DeferredChecks, + IsDeferredCheck); + } } llvm_unreachable("unhandled"); } diff --git a/llvm/test/Verifier/intrinsic-bad-arg-type.ll b/llvm/test/Verifier/intrinsic-bad-arg-type.ll new file mode 100644 index 000000000000..1a6739e67ccc --- /dev/null +++ b/llvm/test/Verifier/intrinsic-bad-arg-type.ll @@ -0,0 +1,10 @@ +; RUN: not opt -S -verify 2>&1 < %s | FileCheck %s + +; CHECK: Intrinsic has incorrect argument type! +; CHECK-NEXT: (*, i32, <4 x i1>, )* @llvm.masked.load.nxv4i32.p0nxv4i32 + +define @masked_load(* %addr, <4 x i1> %mask, %dst) { + %res = call @llvm.masked.load.nxv4i32.p0nxv4i32(* %addr, i32 4, <4 x i1> %mask, %dst) + ret %res +} +declare @llvm.masked.load.nxv4i32.p0nxv4i32(*, i32, <4 x i1>, ) diff --git a/llvm/unittests/IR/IRBuilderTest.cpp b/llvm/unittests/IR/IRBuilderTest.cpp index f7a932e29190..538c2a0dd938 100644 --- a/llvm/unittests/IR/IRBuilderTest.cpp +++ b/llvm/unittests/IR/IRBuilderTest.cpp @@ -122,6 +122,51 @@ TEST_F(IRBuilderTest, Intrinsics) { EXPECT_FALSE(II->hasNoNaNs()); } +TEST_F(IRBuilderTest, IntrinsicsWithScalableVectors) { + IRBuilder<> Builder(BB); + CallInst *Call; + FunctionType *FTy; + + // Test scalable flag isn't dropped for intrinsic that is explicitly defined + // with scalable vectors, e.g. LLVMType. + Type *SrcVecTy = VectorType::get(Builder.getHalfTy(), 8, true); + Type *DstVecTy = VectorType::get(Builder.getInt32Ty(), 4, true); + Type *PredTy = VectorType::get(Builder.getInt1Ty(), 16, true); + + SmallVector ArgTys; + ArgTys.push_back(UndefValue::get(DstVecTy)); + ArgTys.push_back(UndefValue::get(PredTy)); + ArgTys.push_back(UndefValue::get(SrcVecTy)); + + Call = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_fcvtzs_i32f16, {}, + ArgTys, nullptr, "aarch64.sve.fcvtzs.i32f16"); + FTy = Call->getFunctionType(); + EXPECT_EQ(FTy->getReturnType(), DstVecTy); + for (unsigned i = 0; i != ArgTys.size(); ++i) + EXPECT_EQ(FTy->getParamType(i), ArgTys[i]->getType()); + + // Test scalable flag isn't dropped for intrinsic defined with + // LLVMScalarOrSameVectorWidth. + + Type *VecTy = VectorType::get(Builder.getInt32Ty(), 4, true); + Type *PtrToVecTy = VecTy->getPointerTo(); + PredTy = VectorType::get(Builder.getInt1Ty(), 4, true); + + ArgTys.clear(); + ArgTys.push_back(UndefValue::get(PtrToVecTy)); + ArgTys.push_back(UndefValue::get(Builder.getInt32Ty())); + ArgTys.push_back(UndefValue::get(PredTy)); + ArgTys.push_back(UndefValue::get(VecTy)); + + Call = Builder.CreateIntrinsic(Intrinsic::masked_load, + {VecTy, PtrToVecTy}, ArgTys, + nullptr, "masked.load"); + FTy = Call->getFunctionType(); + EXPECT_EQ(FTy->getReturnType(), VecTy); + for (unsigned i = 0; i != ArgTys.size(); ++i) + EXPECT_EQ(FTy->getParamType(i), ArgTys[i]->getType()); +} + TEST_F(IRBuilderTest, ConstrainedFP) { IRBuilder<> Builder(BB); Value *V; diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index 255d78e08211..3779dc717846 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -220,7 +220,8 @@ enum IIT_Info { IIT_STRUCT7 = 39, IIT_STRUCT8 = 40, IIT_F128 = 41, - IIT_VEC_ELEMENT = 42 + IIT_VEC_ELEMENT = 42, + IIT_SCALABLE_VEC = 43 }; static void EncodeFixedValueType(MVT::SimpleValueType VT, @@ -339,6 +340,8 @@ static void EncodeFixedType(Record *R, std::vector &ArgCodes, if (MVT(VT).isVector()) { MVT VVT = VT; + if (VVT.isScalableVector()) + Sig.push_back(IIT_SCALABLE_VEC); switch (VVT.getVectorNumElements()) { default: PrintFatalError("unhandled vector type width in intrinsic!"); case 1: Sig.push_back(IIT_V1); break;