diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 9719dfa43289..eb517edd816f 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -240,7 +240,8 @@ const CGFunctionInfo &CodeGenTypes::getFunctionInfo(CanQualType ResTy, return *FI; // Construct the function info. - FI = new CGFunctionInfo(CC, Info.getNoReturn(), Info.getRegParm(), ResTy, ArgTys); + FI = new CGFunctionInfo(CC, Info.getNoReturn(), Info.getRegParm(), ResTy, + ArgTys); FunctionInfos.InsertNode(FI, InsertPos); // Compute ABI information. @@ -259,6 +260,8 @@ CGFunctionInfo::CGFunctionInfo(unsigned _CallingConvention, NoReturn(_NoReturn), RegParm(_RegParm) { NumArgs = ArgTys.size(); + + // FIXME: Coallocate with the CGFunctionInfo object. Args = new ArgInfo[1 + NumArgs]; Args[0].type = ResTy; for (unsigned i = 0; i < NumArgs; ++i) @@ -593,9 +596,19 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI, bool IsVariadic) { case ABIArgInfo::Ignore: break; - case ABIArgInfo::Coerce: - ArgTys.push_back(AI.getCoerceToType()); + case ABIArgInfo::Coerce: { + // If the coerce-to type is a first class aggregate, flatten it. Either + // way is semantically identical, but fast-isel and the optimizer + // generally likes scalar values better than FCAs. + const llvm::Type *ArgTy = AI.getCoerceToType(); + if (const llvm::StructType *STy = dyn_cast(ArgTy)) { + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + ArgTys.push_back(STy->getElementType(i)); + } else { + ArgTys.push_back(ArgTy); + } break; + } case ABIArgInfo::Indirect: { // indirect arguments are always on the stack, which is addr space #0. @@ -713,7 +726,12 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI, switch (AI.getKind()) { case ABIArgInfo::Coerce: - break; + if (const llvm::StructType *STy = + dyn_cast(AI.getCoerceToType())) + Index += STy->getNumElements(); + else + ++Index; + continue; // Skip index increment. case ABIArgInfo::Indirect: if (AI.getIndirectByVal()) @@ -806,7 +824,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, switch (ArgI.getKind()) { case ABIArgInfo::Indirect: { - llvm::Value* V = AI; + llvm::Value *V = AI; if (hasAggregateLLVMType(Ty)) { // Do nothing, aggregates and complex variables are accessed by // reference. @@ -826,7 +844,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, case ABIArgInfo::Extend: case ABIArgInfo::Direct: { assert(AI != Fn->arg_end() && "Argument mismatch!"); - llvm::Value* V = AI; + llvm::Value *V = AI; if (hasAggregateLLVMType(Ty)) { // Create a temporary alloca to hold the argument; the rest of // codegen expects to access aggregates & complex values by @@ -876,12 +894,29 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, continue; case ABIArgInfo::Coerce: { - assert(AI != Fn->arg_end() && "Argument mismatch!"); + // If the coerce-to type is a first class aggregate, we flatten it and + // pass the elements. Either way is semantically identical, but fast-isel + // and the optimizer generally likes scalar values better than FCAs. + llvm::Value *FormalArg; + if (const llvm::StructType *STy = + dyn_cast(ArgI.getCoerceToType())) { + // Reconstruct the FCA here. + // FIXME: If we have a direct match, do nice gep/store series. + FormalArg = llvm::UndefValue::get(STy); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + assert(AI != Fn->arg_end() && "Argument mismatch!"); + FormalArg = Builder.CreateInsertValue(FormalArg, AI++, i); + } + } else { + assert(AI != Fn->arg_end() && "Argument mismatch!"); + FormalArg = AI++; + } + // FIXME: This is very wasteful; EmitParmDecl is just going to drop the // result in a new alloca anyway, so we could just store into that // directly if we broke the abstraction down more. llvm::Value *V = CreateMemTemp(Ty, "coerce"); - CreateCoercedStore(AI, V, /*DestIsVolatile=*/false, *this); + CreateCoercedStore(FormalArg, V, /*DestIsVolatile=*/false, *this); // Match to what EmitParmDecl is expecting for this type. if (!CodeGenFunction::hasAggregateLLVMType(Ty)) { V = EmitLoadOfScalar(V, false, Ty); @@ -892,7 +927,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI, } } EmitParmDecl(*Arg, V); - break; + continue; // Skip ++AI increment, already done. } } @@ -1080,8 +1115,22 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, StoreComplexToAddr(RV.getComplexVal(), SrcPtr, false); } else SrcPtr = RV.getAggregateAddr(); - Args.push_back(CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), - *this)); + + llvm::Value *SrcVal = + CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), *this); + + // If the coerce-to type is a first class aggregate, we flatten it and + // pass the elements. Either way is semantically identical, but fast-isel + // and the optimizer generally likes scalar values better than FCAs. + if (const llvm::StructType *STy = + dyn_cast(SrcVal->getType())) { + // Extract the elements of the value to pass in. + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) + Args.push_back(Builder.CreateExtractValue(SrcVal, i)); + } else { + Args.push_back(SrcVal); + } + break; } diff --git a/clang/test/CodeGen/x86_64-arguments.c b/clang/test/CodeGen/x86_64-arguments.c index 242239010881..2a96f22e1dc9 100644 --- a/clang/test/CodeGen/x86_64-arguments.c +++ b/clang/test/CodeGen/x86_64-arguments.c @@ -45,7 +45,7 @@ void f7(e7 a0) { // Test merging/passing of upper eightbyte with X87 class. // // CHECK: define %0 @f8_1() -// CHECK: define void @f8_2(%0) +// CHECK: define void @f8_2(i64, double) union u8 { long double a; int b; diff --git a/clang/test/CodeGenCXX/x86_64-arguments.cpp b/clang/test/CodeGenCXX/x86_64-arguments.cpp index 4bc83b85134e..bc2f312aea14 100644 --- a/clang/test/CodeGenCXX/x86_64-arguments.cpp +++ b/clang/test/CodeGenCXX/x86_64-arguments.cpp @@ -6,19 +6,19 @@ // Basic base class test. struct f0_s0 { unsigned a; }; struct f0_s1 : public f0_s0 { void *b; }; -// CHECK: define void @_Z2f05f0_s1([[i64_i64_ty]]) +// CHECK: define void @_Z2f05f0_s1(i64, i64) void f0(f0_s1 a0) { } // Check with two eight-bytes in base class. struct f1_s0 { unsigned a; unsigned b; float c; }; struct f1_s1 : public f1_s0 { float d;}; -// CHECK: define void @_Z2f15f1_s1([[i64_double_ty]]) +// CHECK: define void @_Z2f15f1_s1(i64, double) void f1(f1_s1 a0) { } // Check with two eight-bytes in base class and merge. struct f2_s0 { unsigned a; unsigned b; float c; }; struct f2_s1 : public f2_s0 { char d;}; -// CHECK: define void @_Z2f25f2_s1([[i64_i64_ty]]) +// CHECK: define void @_Z2f25f2_s1(i64, i64) void f2(f2_s1 a0) { } // PR5831 @@ -27,7 +27,7 @@ struct s3_1 { struct s3_0 a; long b; }; void f3(struct s3_1 x) {} // CHECK: define i64 @_Z4f4_0M2s4i(i64) -// CHECK: define [[i64_i64_ty]] @_Z4f4_1M2s4FivE([[i64_i64_ty]]) +// CHECK: define [[i64_i64_ty]] @_Z4f4_1M2s4FivE(i64, i64) struct s4 {}; typedef int s4::* s4_mdp; typedef int (s4::*s4_mfp)();