ARM: Simplify PCS handling.
The backend should now be able to handle all AAPCS rules based on argument type, which means Clang no longer has to duplicate the register-counting logic and the CodeGen can be significantly simplified. llvm-svn: 230349
This commit is contained in:
parent
e95c5b3236
commit
bc784d1caa
|
@ -4351,17 +4351,10 @@ public:
|
|||
|
||||
private:
|
||||
ABIKind Kind;
|
||||
mutable int VFPRegs[16];
|
||||
const unsigned NumVFPs;
|
||||
const unsigned NumGPRs;
|
||||
mutable unsigned AllocatedGPRs;
|
||||
mutable unsigned AllocatedVFPs;
|
||||
|
||||
public:
|
||||
ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind),
|
||||
NumVFPs(16), NumGPRs(4) {
|
||||
ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) {
|
||||
setCCs();
|
||||
resetAllocatedRegs();
|
||||
}
|
||||
|
||||
bool isEABI() const {
|
||||
|
@ -4391,8 +4384,7 @@ public:
|
|||
|
||||
private:
|
||||
ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const;
|
||||
ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic,
|
||||
bool &IsCPRC) const;
|
||||
ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic) const;
|
||||
bool isIllegalVectorType(QualType Ty) const;
|
||||
|
||||
bool isHomogeneousAggregateBaseType(QualType Ty) const override;
|
||||
|
@ -4407,10 +4399,6 @@ private:
|
|||
llvm::CallingConv::ID getLLVMDefaultCC() const;
|
||||
llvm::CallingConv::ID getABIDefaultCC() const;
|
||||
void setCCs();
|
||||
|
||||
void markAllocatedGPRs(unsigned Alignment, unsigned NumRequired) const;
|
||||
void markAllocatedVFPs(unsigned Alignment, unsigned NumRequired) const;
|
||||
void resetAllocatedRegs(void) const;
|
||||
};
|
||||
|
||||
class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
|
||||
|
@ -4521,52 +4509,11 @@ void WindowsARMTargetCodeGenInfo::SetTargetAttributes(
|
|||
}
|
||||
|
||||
void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
|
||||
// To correctly handle Homogeneous Aggregate, we need to keep track of the
|
||||
// VFP registers allocated so far.
|
||||
// C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
|
||||
// VFP registers of the appropriate type unallocated then the argument is
|
||||
// allocated to the lowest-numbered sequence of such registers.
|
||||
// C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
|
||||
// unallocated are marked as unavailable.
|
||||
resetAllocatedRegs();
|
||||
|
||||
if (getCXXABI().classifyReturnType(FI)) {
|
||||
if (FI.getReturnInfo().isIndirect())
|
||||
markAllocatedGPRs(1, 1);
|
||||
} else {
|
||||
if (!getCXXABI().classifyReturnType(FI))
|
||||
FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic());
|
||||
}
|
||||
for (auto &I : FI.arguments()) {
|
||||
unsigned PreAllocationVFPs = AllocatedVFPs;
|
||||
unsigned PreAllocationGPRs = AllocatedGPRs;
|
||||
bool IsCPRC = false;
|
||||
// 6.1.2.3 There is one VFP co-processor register class using registers
|
||||
// s0-s15 (d0-d7) for passing arguments.
|
||||
I.info = classifyArgumentType(I.type, FI.isVariadic(), IsCPRC);
|
||||
|
||||
// If we have allocated some arguments onto the stack (due to running
|
||||
// out of VFP registers), we cannot split an argument between GPRs and
|
||||
// the stack. If this situation occurs, we add padding to prevent the
|
||||
// GPRs from being used. In this situation, the current argument could
|
||||
// only be allocated by rule C.8, so rule C.6 would mark these GPRs as
|
||||
// unusable anyway.
|
||||
// We do not have to do this if the argument is being passed ByVal, as the
|
||||
// backend can handle that situation correctly.
|
||||
const bool StackUsed = PreAllocationGPRs > NumGPRs || PreAllocationVFPs > NumVFPs;
|
||||
const bool IsByVal = I.info.isIndirect() && I.info.getIndirectByVal();
|
||||
if (!IsCPRC && PreAllocationGPRs < NumGPRs && AllocatedGPRs > NumGPRs &&
|
||||
StackUsed && !IsByVal) {
|
||||
llvm::Type *PaddingTy = llvm::ArrayType::get(
|
||||
llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreAllocationGPRs);
|
||||
if (I.info.canHaveCoerceToType()) {
|
||||
I.info = ABIArgInfo::getDirect(I.info.getCoerceToType() /* type */,
|
||||
0 /* offset */, PaddingTy, true);
|
||||
} else {
|
||||
I.info = ABIArgInfo::getDirect(nullptr /* type */, 0 /* offset */,
|
||||
PaddingTy, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto &I : FI.arguments())
|
||||
I.info = classifyArgumentType(I.type, FI.isVariadic());
|
||||
|
||||
// Always honor user-specified calling convention.
|
||||
if (FI.getCallingConvention() != llvm::CallingConv::C)
|
||||
|
@ -4574,7 +4521,7 @@ void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
|
|||
|
||||
llvm::CallingConv::ID cc = getRuntimeCC();
|
||||
if (cc != llvm::CallingConv::C)
|
||||
FI.setEffectiveCallingConvention(cc);
|
||||
FI.setEffectiveCallingConvention(cc);
|
||||
}
|
||||
|
||||
/// Return the default calling convention that LLVM will use.
|
||||
|
@ -4612,64 +4559,8 @@ void ARMABIInfo::setCCs() {
|
|||
llvm::CallingConv::ARM_APCS : llvm::CallingConv::ARM_AAPCS);
|
||||
}
|
||||
|
||||
/// markAllocatedVFPs - update VFPRegs according to the alignment and
|
||||
/// number of VFP registers (unit is S register) requested.
|
||||
void ARMABIInfo::markAllocatedVFPs(unsigned Alignment,
|
||||
unsigned NumRequired) const {
|
||||
// Early Exit.
|
||||
if (AllocatedVFPs >= 16) {
|
||||
// We use AllocatedVFP > 16 to signal that some CPRCs were allocated on
|
||||
// the stack.
|
||||
AllocatedVFPs = 17;
|
||||
return;
|
||||
}
|
||||
// C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
|
||||
// VFP registers of the appropriate type unallocated then the argument is
|
||||
// allocated to the lowest-numbered sequence of such registers.
|
||||
for (unsigned I = 0; I < 16; I += Alignment) {
|
||||
bool FoundSlot = true;
|
||||
for (unsigned J = I, JEnd = I + NumRequired; J < JEnd; J++)
|
||||
if (J >= 16 || VFPRegs[J]) {
|
||||
FoundSlot = false;
|
||||
break;
|
||||
}
|
||||
if (FoundSlot) {
|
||||
for (unsigned J = I, JEnd = I + NumRequired; J < JEnd; J++)
|
||||
VFPRegs[J] = 1;
|
||||
AllocatedVFPs += NumRequired;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
|
||||
// unallocated are marked as unavailable.
|
||||
for (unsigned I = 0; I < 16; I++)
|
||||
VFPRegs[I] = 1;
|
||||
AllocatedVFPs = 17; // We do not have enough VFP registers.
|
||||
}
|
||||
|
||||
/// Update AllocatedGPRs to record the number of general purpose registers
|
||||
/// which have been allocated. It is valid for AllocatedGPRs to go above 4,
|
||||
/// this represents arguments being stored on the stack.
|
||||
void ARMABIInfo::markAllocatedGPRs(unsigned Alignment,
|
||||
unsigned NumRequired) const {
|
||||
assert((Alignment == 1 || Alignment == 2) && "Alignment must be 4 or 8 bytes");
|
||||
|
||||
if (Alignment == 2 && AllocatedGPRs & 0x1)
|
||||
AllocatedGPRs += 1;
|
||||
|
||||
AllocatedGPRs += NumRequired;
|
||||
}
|
||||
|
||||
void ARMABIInfo::resetAllocatedRegs(void) const {
|
||||
AllocatedGPRs = 0;
|
||||
AllocatedVFPs = 0;
|
||||
for (unsigned i = 0; i < NumVFPs; ++i)
|
||||
VFPRegs[i] = 0;
|
||||
}
|
||||
|
||||
ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
|
||||
bool &IsCPRC) const {
|
||||
// We update number of allocated VFPs according to
|
||||
ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
|
||||
bool isVariadic) const {
|
||||
// 6.1.2.1 The following argument types are VFP CPRCs:
|
||||
// A single-precision floating-point type (including promoted
|
||||
// half-precision types); A double-precision floating-point type;
|
||||
|
@ -4687,58 +4578,20 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
|
|||
if (Size <= 32) {
|
||||
llvm::Type *ResType =
|
||||
llvm::Type::getInt32Ty(getVMContext());
|
||||
markAllocatedGPRs(1, 1);
|
||||
return ABIArgInfo::getDirect(ResType);
|
||||
}
|
||||
if (Size == 64) {
|
||||
llvm::Type *ResType = llvm::VectorType::get(
|
||||
llvm::Type::getInt32Ty(getVMContext()), 2);
|
||||
if (getABIKind() == ARMABIInfo::AAPCS || isVariadic){
|
||||
markAllocatedGPRs(2, 2);
|
||||
} else {
|
||||
markAllocatedVFPs(2, 2);
|
||||
IsCPRC = true;
|
||||
}
|
||||
return ABIArgInfo::getDirect(ResType);
|
||||
}
|
||||
if (Size == 128) {
|
||||
llvm::Type *ResType = llvm::VectorType::get(
|
||||
llvm::Type::getInt32Ty(getVMContext()), 4);
|
||||
if (getABIKind() == ARMABIInfo::AAPCS || isVariadic) {
|
||||
markAllocatedGPRs(2, 4);
|
||||
} else {
|
||||
markAllocatedVFPs(4, 4);
|
||||
IsCPRC = true;
|
||||
}
|
||||
return ABIArgInfo::getDirect(ResType);
|
||||
}
|
||||
markAllocatedGPRs(1, 1);
|
||||
return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
|
||||
}
|
||||
// Update VFPRegs for legal vector types.
|
||||
if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
|
||||
if (const VectorType *VT = Ty->getAs<VectorType>()) {
|
||||
uint64_t Size = getContext().getTypeSize(VT);
|
||||
// Size of a legal vector should be power of 2 and above 64.
|
||||
markAllocatedVFPs(Size >= 128 ? 4 : 2, Size / 32);
|
||||
IsCPRC = true;
|
||||
}
|
||||
}
|
||||
// Update VFPRegs for floating point types.
|
||||
if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
|
||||
if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
|
||||
if (BT->getKind() == BuiltinType::Half ||
|
||||
BT->getKind() == BuiltinType::Float) {
|
||||
markAllocatedVFPs(1, 1);
|
||||
IsCPRC = true;
|
||||
}
|
||||
if (BT->getKind() == BuiltinType::Double ||
|
||||
BT->getKind() == BuiltinType::LongDouble) {
|
||||
markAllocatedVFPs(2, 2);
|
||||
IsCPRC = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!isAggregateTypeForABI(Ty)) {
|
||||
// Treat an enum type as its underlying type.
|
||||
|
@ -4746,15 +4599,11 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
|
|||
Ty = EnumTy->getDecl()->getIntegerType();
|
||||
}
|
||||
|
||||
unsigned Size = getContext().getTypeSize(Ty);
|
||||
if (!IsCPRC)
|
||||
markAllocatedGPRs(Size > 32 ? 2 : 1, (Size + 31) / 32);
|
||||
return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend()
|
||||
: ABIArgInfo::getDirect());
|
||||
}
|
||||
|
||||
if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
|
||||
markAllocatedGPRs(1, 1);
|
||||
return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
|
||||
}
|
||||
|
||||
|
@ -4770,19 +4619,6 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
|
|||
if (isHomogeneousAggregate(Ty, Base, Members)) {
|
||||
assert(Base && "Base class should be set for homogeneous aggregate");
|
||||
// Base can be a floating-point or a vector.
|
||||
if (Base->isVectorType()) {
|
||||
// ElementSize is in number of floats.
|
||||
unsigned ElementSize = getContext().getTypeSize(Base) == 64 ? 2 : 4;
|
||||
markAllocatedVFPs(ElementSize,
|
||||
Members * ElementSize);
|
||||
} else if (Base->isSpecificBuiltinType(BuiltinType::Float))
|
||||
markAllocatedVFPs(1, Members);
|
||||
else {
|
||||
assert(Base->isSpecificBuiltinType(BuiltinType::Double) ||
|
||||
Base->isSpecificBuiltinType(BuiltinType::LongDouble));
|
||||
markAllocatedVFPs(2, Members * 2);
|
||||
}
|
||||
IsCPRC = true;
|
||||
return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
|
||||
}
|
||||
}
|
||||
|
@ -4801,7 +4637,6 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
|
|||
// argument is greater than 64 bytes, this will always use up any available
|
||||
// registers (of which there are 4). We also don't care about getting the
|
||||
// alignment right, because general-purpose registers cannot be back-filled.
|
||||
markAllocatedGPRs(1, 4);
|
||||
return ABIArgInfo::getIndirect(TyAlign, /*ByVal=*/true,
|
||||
/*Realign=*/TyAlign > ABIAlign);
|
||||
}
|
||||
|
@ -4814,11 +4649,9 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
|
|||
if (getContext().getTypeAlign(Ty) <= 32) {
|
||||
ElemTy = llvm::Type::getInt32Ty(getVMContext());
|
||||
SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32;
|
||||
markAllocatedGPRs(1, SizeRegs);
|
||||
} else {
|
||||
ElemTy = llvm::Type::getInt64Ty(getVMContext());
|
||||
SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64;
|
||||
markAllocatedGPRs(2, SizeRegs * 2);
|
||||
}
|
||||
|
||||
return ABIArgInfo::getDirect(llvm::ArrayType::get(ElemTy, SizeRegs));
|
||||
|
@ -4918,7 +4751,6 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
|
|||
|
||||
// Large vector types should be returned via memory.
|
||||
if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) {
|
||||
markAllocatedGPRs(1, 1);
|
||||
return ABIArgInfo::getIndirect(0);
|
||||
}
|
||||
|
||||
|
@ -4956,7 +4788,6 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
|
|||
}
|
||||
|
||||
// Otherwise return in memory.
|
||||
markAllocatedGPRs(1, 1);
|
||||
return ABIArgInfo::getIndirect(0);
|
||||
}
|
||||
|
||||
|
@ -4992,7 +4823,6 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
|
|||
return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
|
||||
}
|
||||
|
||||
markAllocatedGPRs(1, 1);
|
||||
return ABIArgInfo::getIndirect(0);
|
||||
}
|
||||
|
||||
|
|
|
@ -126,19 +126,19 @@ typedef struct { long long x; int y; } struct_long_long_int;
|
|||
// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_1(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, i64 %k, i32 %l)
|
||||
void test_vfp_stack_gpr_split_1(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, long long k, int l) {}
|
||||
|
||||
// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_2(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, [3 x i32], [2 x i64] %k.coerce)
|
||||
// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_2(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, [2 x i64] %k.coerce)
|
||||
void test_vfp_stack_gpr_split_2(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, struct_long_long_int k) {}
|
||||
|
||||
// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_3(%struct.struct_long_long_int* noalias sret %agg.result, double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, [3 x i32], [2 x i64] %k.coerce)
|
||||
// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_3(%struct.struct_long_long_int* noalias sret %agg.result, double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, [2 x i64] %k.coerce)
|
||||
struct_long_long_int test_vfp_stack_gpr_split_3(double a, double b, double c, double d, double e, double f, double g, double h, double i, struct_long_long_int k) {}
|
||||
|
||||
typedef struct { int a; int b:4; int c; } struct_int_bitfield_int;
|
||||
// CHECK: define arm_aapcs_vfpcc void @test_test_vfp_stack_gpr_split_bitfield(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, i32 %k, [2 x i32], [3 x i32] %l.coerce)
|
||||
// CHECK: define arm_aapcs_vfpcc void @test_test_vfp_stack_gpr_split_bitfield(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, i32 %k, [3 x i32] %l.coerce)
|
||||
void test_test_vfp_stack_gpr_split_bitfield(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, int k, struct_int_bitfield_int l) {}
|
||||
|
||||
// Note: this struct requires internal padding
|
||||
typedef struct { int x; long long y; } struct_int_long_long;
|
||||
// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_4(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, [3 x i32], [2 x i64] %k.coerce)
|
||||
// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_4(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, [2 x i64] %k.coerce)
|
||||
void test_vfp_stack_gpr_split_4(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, struct_int_long_long k) {}
|
||||
|
||||
// This very large struct (passed byval) uses up the GPRs, so no padding is needed
|
||||
|
|
Loading…
Reference in New Issue