Rewrite NRVO determination. Track NRVO candidates on the parser Scope and apply the NRVO candidate flag to all possible NRVO candidates here, and remove the flags in computeNRVO or upon template instantiation. A variable now has NRVO applied if and only if every return statement in that scope returns that variable. This is nearly optimal.

Performs NRVO roughly 7% more often in a bootstrap build of clang. Patch co-authored by Richard Smith.

llvm-svn: 207890
This commit is contained in:
Nick Lewycky 2014-05-03 00:41:18 +00:00
parent e39ee21551
commit d78f92fbb2
11 changed files with 186 additions and 73 deletions

View File

@ -15,6 +15,7 @@
#define LLVM_CLANG_SEMA_SCOPE_H
#include "clang/Basic/Diagnostic.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@ -28,6 +29,7 @@ namespace clang {
class Decl;
class UsingDirectiveDecl;
class VarDecl;
/// Scope - A scope is a transient data structure that is used while parsing the
/// program. It assists with resolving identifiers to the appropriate
@ -167,7 +169,11 @@ private:
/// \brief Used to determine if errors occurred in this scope.
DiagnosticErrorTrap ErrorTrap;
/// A lattice consisting of undefined, a single NRVO candidate variable in
/// this scope, or over-defined. The bit is true when over-defined.
llvm::PointerIntPair<VarDecl *, 1, bool> NRVO;
public:
Scope(Scope *Parent, unsigned ScopeFlags, DiagnosticsEngine &Diag)
: ErrorTrap(Diag) {
@ -373,6 +379,24 @@ public:
UsingDirectives.end());
}
void addNRVOCandidate(VarDecl *VD) {
if (NRVO.getInt())
return;
if (NRVO.getPointer() == nullptr) {
NRVO.setPointer(VD);
return;
}
if (NRVO.getPointer() != VD)
setNoNRVO();
}
void setNoNRVO() {
NRVO.setInt(1);
NRVO.setPointer(nullptr);
}
void mergeNRVOIntoParent();
/// Init - This is used by the parser to implement scope caching.
///
void Init(Scope *parent, unsigned flags);

View File

@ -3056,10 +3056,14 @@ public:
RecordDecl *CreateCapturedStmtRecordDecl(CapturedDecl *&CD,
SourceLocation Loc,
unsigned NumParams);
const VarDecl *getCopyElisionCandidate(QualType ReturnType, Expr *E,
bool AllowFunctionParameters);
VarDecl *getCopyElisionCandidate(QualType ReturnType, Expr *E,
bool AllowFunctionParameters);
bool isCopyElisionCandidate(QualType ReturnType, const VarDecl *VD,
bool AllowFunctionParameters);
StmtResult ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp);
StmtResult ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp,
Scope *CurScope);
StmtResult BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp);
StmtResult ActOnCapScopeReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp);
StmtResult ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,

View File

@ -1750,7 +1750,7 @@ StmtResult Parser::ParseReturnStatement() {
return StmtError();
}
}
return Actions.ActOnReturnStmt(ReturnLoc, R.take());
return Actions.ActOnReturnStmt(ReturnLoc, R.take(), getCurScope());
}
namespace {

View File

@ -365,8 +365,7 @@ void Parser::ExitScope() {
// Inform the actions module that this scope is going away if there are any
// decls in it.
if (!getCurScope()->decl_empty())
Actions.ActOnPopScope(Tok.getLocation(), getCurScope());
Actions.ActOnPopScope(Tok.getLocation(), getCurScope());
Scope *OldScope = getCurScope();
Actions.CurScope = OldScope->getParent();

View File

@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "clang/Sema/Scope.h"
#include "clang/AST/Decl.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;
@ -77,6 +78,7 @@ void Scope::Init(Scope *parent, unsigned flags) {
UsingDirectives.clear();
Entity = 0;
ErrorTrap.reset();
NRVO.setPointerAndInt(nullptr, 0);
}
bool Scope::containedInPrototypeScope() const {
@ -103,6 +105,21 @@ void Scope::AddFlags(unsigned FlagsToSet) {
Flags |= FlagsToSet;
}
void Scope::mergeNRVOIntoParent() {
if (VarDecl *Candidate = NRVO.getPointer()) {
if (isDeclScope(Candidate))
Candidate->setNRVOVariable(true);
}
if (getEntity())
return;
if (NRVO.getInt())
getParent()->setNoNRVO();
else if (NRVO.getPointer())
getParent()->addNRVOCandidate(NRVO.getPointer());
}
void Scope::dump() const { dumpImpl(llvm::errs()); }
void Scope::dumpImpl(raw_ostream &OS) const {
@ -176,4 +193,9 @@ void Scope::dumpImpl(raw_ostream &OS) const {
OS << "MSLocalManglingNumber: " << getMSLocalManglingNumber() << '\n';
if (const DeclContext *DC = getEntity())
OS << "Entity : (clang::DeclContext*)" << DC << '\n';
if (NRVO.getInt())
OS << "NRVO not allowed";
else if (NRVO.getPointer())
OS << "NRVO candidate : (clang::VarDecl*)" << NRVO.getPointer() << '\n';
}

View File

@ -1373,6 +1373,8 @@ static void CheckPoppedLabel(LabelDecl *L, Sema &S) {
}
void Sema::ActOnPopScope(SourceLocation Loc, Scope *S) {
S->mergeNRVOIntoParent();
if (S->decl_empty()) return;
assert((S->getFlags() & (Scope::DeclScope | Scope::TemplateParamScope)) &&
"Scope shouldn't contain decls!");
@ -9797,28 +9799,17 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D) {
/// use the named return value optimization.
///
/// This function applies a very simplistic algorithm for NRVO: if every return
/// statement in the function has the same NRVO candidate, that candidate is
/// the NRVO variable.
///
/// FIXME: Employ a smarter algorithm that accounts for multiple return
/// statements and the lifetimes of the NRVO candidates. We should be able to
/// find a maximal set of NRVO variables.
/// statement in the scope of a variable has the same NRVO candidate, that
/// candidate is an NRVO variable.
void Sema::computeNRVO(Stmt *Body, FunctionScopeInfo *Scope) {
ReturnStmt **Returns = Scope->Returns.data();
const VarDecl *NRVOCandidate = 0;
for (unsigned I = 0, E = Scope->Returns.size(); I != E; ++I) {
if (!Returns[I]->getNRVOCandidate())
return;
if (!NRVOCandidate)
NRVOCandidate = Returns[I]->getNRVOCandidate();
else if (NRVOCandidate != Returns[I]->getNRVOCandidate())
return;
if (const VarDecl *NRVOCandidate = Returns[I]->getNRVOCandidate()) {
if (!NRVOCandidate->isNRVOVariable())
Returns[I]->setNRVOCandidate(nullptr);
}
}
if (NRVOCandidate)
const_cast<VarDecl*>(NRVOCandidate)->setNRVOVariable(true);
}
bool Sema::canDelayFunctionBody(const Declarator &D) {

View File

@ -9623,7 +9623,7 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation,
// Add a "return *this;"
ExprResult ThisObj = CreateBuiltinUnaryOp(Loc, UO_Deref, This.build(*this, Loc));
StmtResult Return = ActOnReturnStmt(Loc, ThisObj.get());
StmtResult Return = BuildReturnStmt(Loc, ThisObj.get());
if (Return.isInvalid())
Invalid = true;
else {
@ -10041,7 +10041,7 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation,
// Add a "return *this;"
ExprResult ThisObj = CreateBuiltinUnaryOp(Loc, UO_Deref, This.build(*this, Loc));
StmtResult Return = ActOnReturnStmt(Loc, ThisObj.get());
StmtResult Return = BuildReturnStmt(Loc, ThisObj.get());
if (Return.isInvalid())
Invalid = true;
else {
@ -10447,7 +10447,7 @@ void Sema::DefineImplicitLambdaToFunctionPointerConversion(
Expr *FunctionRef = BuildDeclRefExpr(Invoker, Invoker->getType(),
VK_LValue, Conv->getLocation()).take();
assert(FunctionRef && "Can't refer to __invoke function?");
Stmt *Return = ActOnReturnStmt(Conv->getLocation(), FunctionRef).take();
Stmt *Return = BuildReturnStmt(Conv->getLocation(), FunctionRef).take();
Conv->setBody(new (Context) CompoundStmt(Context, Return,
Conv->getLocation(),
Conv->getLocation()));
@ -10505,7 +10505,7 @@ void Sema::DefineImplicitLambdaToBlockPointerConversion(
// Create the return statement that returns the block from the conversion
// function.
StmtResult Return = ActOnReturnStmt(Conv->getLocation(), BuildBlock.get());
StmtResult Return = BuildReturnStmt(Conv->getLocation(), BuildBlock.get());
if (Return.isInvalid()) {
Diag(CurrentLocation, diag::note_lambda_to_block_conv);
Conv->setInvalidDecl();

View File

@ -2443,52 +2443,62 @@ Sema::ActOnBreakStmt(SourceLocation BreakLoc, Scope *CurScope) {
///
/// \returns The NRVO candidate variable, if the return statement may use the
/// NRVO, or NULL if there is no such candidate.
const VarDecl *Sema::getCopyElisionCandidate(QualType ReturnType,
Expr *E,
bool AllowFunctionParameter) {
QualType ExprType = E->getType();
VarDecl *Sema::getCopyElisionCandidate(QualType ReturnType,
Expr *E,
bool AllowFunctionParameter) {
if (!getLangOpts().CPlusPlus)
return nullptr;
// - in a return statement in a function [where] ...
// ... the expression is the name of a non-volatile automatic object ...
DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E->IgnoreParens());
if (!DR || DR->refersToEnclosingLocal())
return nullptr;
VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl());
if (!VD)
return nullptr;
if (isCopyElisionCandidate(ReturnType, VD, AllowFunctionParameter))
return VD;
return nullptr;
}
bool Sema::isCopyElisionCandidate(QualType ReturnType, const VarDecl *VD,
bool AllowFunctionParameter) {
QualType VDType = VD->getType();
// - in a return statement in a function with ...
// ... a class return type ...
if (!ReturnType.isNull()) {
if (!ReturnType.isNull() && !ReturnType->isDependentType()) {
if (!ReturnType->isRecordType())
return 0;
return false;
// ... the same cv-unqualified type as the function return type ...
if (!Context.hasSameUnqualifiedType(ReturnType, ExprType))
return 0;
if (!VDType->isDependentType() &&
!Context.hasSameUnqualifiedType(ReturnType, VDType))
return false;
}
// ... the expression is the name of a non-volatile automatic object
// (other than a function or catch-clause parameter)) ...
const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E->IgnoreParens());
if (!DR || DR->refersToEnclosingLocal())
return 0;
const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl());
if (!VD)
return 0;
// ...object (other than a function or catch-clause parameter)...
if (VD->getKind() != Decl::Var &&
!(AllowFunctionParameter && VD->getKind() == Decl::ParmVar))
return 0;
if (VD->isExceptionVariable()) return 0;
return false;
if (VD->isExceptionVariable()) return false;
// ...automatic...
if (!VD->hasLocalStorage()) return 0;
if (!VD->hasLocalStorage()) return false;
// ...non-volatile...
if (VD->getType().isVolatileQualified()) return 0;
if (VD->getType()->isReferenceType()) return 0;
if (VD->getType().isVolatileQualified()) return false;
// __block variables can't be allocated in a way that permits NRVO.
if (VD->hasAttr<BlocksAttr>()) return 0;
if (VD->hasAttr<BlocksAttr>()) return false;
// Variables with higher required alignment than their type's ABI
// alignment cannot use NRVO.
if (VD->hasAttr<AlignedAttr>() &&
if (!VD->getType()->isDependentType() && VD->hasAttr<AlignedAttr>() &&
Context.getDeclAlign(VD) > Context.getTypeAlignInChars(VD->getType()))
return 0;
return false;
return VD;
return true;
}
/// \brief Perform the initialization of a potentially-movable value, which
@ -2694,6 +2704,8 @@ Sema::ActOnCapScopeReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
}
RetValExp = Res.take();
CheckReturnValExpr(RetValExp, FnRetType, ReturnLoc);
} else {
NRVOCandidate = getCopyElisionCandidate(FnRetType, RetValExp, false);
}
if (RetValExp) {
@ -2708,9 +2720,7 @@ Sema::ActOnCapScopeReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
// If we need to check for the named return value optimization,
// or if we need to infer the return type,
// save the return statement in our scope for later processing.
if (CurCap->HasImplicitReturnType ||
(getLangOpts().CPlusPlus && FnRetType->isRecordType() &&
!CurContext->isDependentContext()))
if (CurCap->HasImplicitReturnType || NRVOCandidate)
FunctionScopes.back()->Returns.push_back(Result);
return Owned(Result);
@ -2807,7 +2817,24 @@ bool Sema::DeduceFunctionTypeFromReturnExpr(FunctionDecl *FD,
}
StmtResult
Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp,
Scope *CurScope) {
StmtResult R = BuildReturnStmt(ReturnLoc, RetValExp);
if (R.isInvalid()) {
return R;
}
if (VarDecl *VD =
const_cast<VarDecl*>(cast<ReturnStmt>(R.get())->getNRVOCandidate())) {
CurScope->addNRVOCandidate(VD);
} else {
CurScope->setNoNRVO();
}
return R;
}
StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
// Check for unexpanded parameter packs.
if (RetValExp && DiagnoseUnexpandedParameterPack(RetValExp))
return StmtError();
@ -2948,18 +2975,19 @@ Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
} else {
assert(RetValExp || HasDependentReturnType);
const VarDecl *NRVOCandidate = 0;
QualType RetType = RelatedRetType.isNull() ? FnRetType : RelatedRetType;
// C99 6.8.6.4p3(136): The return statement is not an assignment. The
// overlap restriction of subclause 6.5.16.1 does not apply to the case of
// function return.
// In C++ the return statement is handled via a copy initialization,
// the C version of which boils down to CheckSingleAssignmentConstraints.
if (RetValExp)
NRVOCandidate = getCopyElisionCandidate(FnRetType, RetValExp, false);
if (!HasDependentReturnType && !RetValExp->isTypeDependent()) {
// we have a non-void function with an expression, continue checking
QualType RetType = (RelatedRetType.isNull() ? FnRetType : RelatedRetType);
// C99 6.8.6.4p3(136): The return statement is not an assignment. The
// overlap restriction of subclause 6.5.16.1 does not apply to the case of
// function return.
// In C++ the return statement is handled via a copy initialization,
// the C version of which boils down to CheckSingleAssignmentConstraints.
NRVOCandidate = getCopyElisionCandidate(FnRetType, RetValExp, false);
InitializedEntity Entity = InitializedEntity::InitializeResult(ReturnLoc,
RetType,
NRVOCandidate != 0);
@ -3001,8 +3029,7 @@ Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
// If we need to check for the named return value optimization, save the
// return statement in our scope for later processing.
if (getLangOpts().CPlusPlus && FnRetType->isRecordType() &&
!CurContext->isDependentContext())
if (Result->getNRVOCandidate())
FunctionScopes.back()->Returns.push_back(Result);
return Owned(Result);

View File

@ -417,6 +417,13 @@ Decl *TemplateDeclInstantiator::VisitVarDecl(VarDecl *D,
SemaRef.BuildVariableInstantiation(Var, D, TemplateArgs, LateAttrs, Owner,
StartingScope, InstantiatingVarTemplate);
if (D->isNRVOVariable()) {
QualType ReturnType = cast<FunctionDecl>(DC)->getReturnType();
if (SemaRef.isCopyElisionCandidate(ReturnType, Var, false))
Var->setNRVOVariable(true);
}
return Var;
}

View File

@ -1187,7 +1187,7 @@ public:
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide different behavior.
StmtResult RebuildReturnStmt(SourceLocation ReturnLoc, Expr *Result) {
return getSema().ActOnReturnStmt(ReturnLoc, Result);
return getSema().BuildReturnStmt(ReturnLoc, Result);
}
/// \brief Build a new declaration statement.

View File

@ -9,6 +9,14 @@ public:
~X();
};
template<typename T> struct Y {
Y();
static Y f() {
Y y;
return y;
}
};
// CHECK-LABEL: define void @_Z5test0v
// CHECK-EH-LABEL: define void @_Z5test0v
X test0() {
@ -108,12 +116,18 @@ X test2(bool B) {
}
// CHECK-LABEL: define void @_Z5test3b
X test3(bool B) {
// FIXME: We don't manage to apply NRVO here, although we could.
{
// FIXME: llvm should apply tail here.
// CHECK: call {{.*}} @_ZN1XC1Ev
// CHECK-NOT: call {{.*}} @_ZN1XC1ERKS_
// CHECK: call {{.*}} @_ZN1XC1Ev
// CHECK: call {{.*}} @_ZN1XC1ERKS_
if (B) {
X y;
return y;
}
// FIXME: we should NRVO this variable too.
X x;
return x;
}
@ -161,4 +175,29 @@ X test6() {
// CHECK-NEXT: ret void
}
X test7(bool b) {
if (b) {
X x;
return x;
}
return X();
}
X test8(bool b) {
if (b) {
X x;
return x;
} else {
X y;
return y;
}
}
Y<int> test9() {
Y<int>::f();
}
// CHECK-LABEL: define linkonce_odr void @_ZN1YIiE1fEv
// CHECK: tail call {{.*}} @_ZN1YIiEC1Ev
// CHECK-EH: attributes [[NR_NUW]] = { noreturn nounwind }