diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index d8768bee2cdf..98d9e364d9cc 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -18,6 +18,7 @@ #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/Support/Path.h" using namespace clang; @@ -259,6 +260,43 @@ void CodeGenFunction::EmitCXXGuardedInit(const VarDecl &D, CGM.getCXXABI().EmitGuardedInit(*this, D, DeclPtr, PerformInit); } +void CodeGenFunction::EmitCXXGuardedInitBranch(llvm::Value *NeedsInit, + llvm::BasicBlock *InitBlock, + llvm::BasicBlock *NoInitBlock, + GuardKind Kind, + const VarDecl *D) { + assert((Kind == GuardKind::TlsGuard || D) && "no guarded variable"); + + // A guess at how many times we will enter the initialization of a + // variable, depending on the kind of variable. + static const uint64_t InitsPerTLSVar = 1024; + static const uint64_t InitsPerLocalVar = 1024 * 1024; + + llvm::MDNode *Weights; + if (Kind == GuardKind::VariableGuard && !D->isLocalVarDecl()) { + // For non-local variables, don't apply any weighting for now. Due to our + // use of COMDATs, we expect there to be at most one initialization of the + // variable per DSO, but we have no way to know how many DSOs will try to + // initialize the variable. + Weights = nullptr; + } else { + uint64_t NumInits; + // FIXME: For the TLS case, collect and use profiling information to + // determine a more accurate brach weight. + if (Kind == GuardKind::TlsGuard || D->getTLSKind()) + NumInits = InitsPerTLSVar; + else + NumInits = InitsPerLocalVar; + + // The probability of us entering the initializer is + // 1 / (total number of times we attempt to initialize the variable). + llvm::MDBuilder MDHelper(CGM.getLLVMContext()); + Weights = MDHelper.createBranchWeights(1, NumInits - 1); + } + + Builder.CreateCondBr(NeedsInit, InitBlock, NoInitBlock, Weights); +} + llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( llvm::FunctionType *FTy, const Twine &Name, const CGFunctionInfo &FI, SourceLocation Loc, bool TLS) { @@ -539,7 +577,8 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn, "guard.uninitialized"); llvm::BasicBlock *InitBlock = createBasicBlock("init"); ExitBlock = createBasicBlock("exit"); - Builder.CreateCondBr(Uninit, InitBlock, ExitBlock); + EmitCXXGuardedInitBranch(Uninit, InitBlock, ExitBlock, + GuardKind::TlsGuard, nullptr); EmitBlock(InitBlock); // Mark as initialized before initializing anything else. If the // initializers use previously-initialized thread_local vars, that's diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 753dd92f3071..2e31be8c6865 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3496,6 +3496,14 @@ public: void EmitCXXGuardedInit(const VarDecl &D, llvm::GlobalVariable *DeclPtr, bool PerformInit); + enum class GuardKind { VariableGuard, TlsGuard }; + + /// Emit a branch to select whether or not to perform guarded initialization. + void EmitCXXGuardedInitBranch(llvm::Value *NeedsInit, + llvm::BasicBlock *InitBlock, + llvm::BasicBlock *NoInitBlock, + GuardKind Kind, const VarDecl *D); + /// GenerateCXXGlobalInitFunc - Generates code for initializing global /// variables. void GenerateCXXGlobalInitFunc(llvm::Function *Fn, diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index d1f47b6d1a43..de9fd042a9e4 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2113,13 +2113,14 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF, (UseARMGuardVarABI && !useInt8GuardVariable) ? Builder.CreateAnd(LI, llvm::ConstantInt::get(CGM.Int8Ty, 1)) : LI; - llvm::Value *isInitialized = Builder.CreateIsNull(V, "guard.uninitialized"); + llvm::Value *NeedsInit = Builder.CreateIsNull(V, "guard.uninitialized"); llvm::BasicBlock *InitCheckBlock = CGF.createBasicBlock("init.check"); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end"); // Check if the first byte of the guard variable is zero. - Builder.CreateCondBr(isInitialized, InitCheckBlock, EndBlock); + CGF.EmitCXXGuardedInitBranch(NeedsInit, InitCheckBlock, EndBlock, + CodeGenFunction::GuardKind::VariableGuard, &D); CGF.EmitBlock(InitCheckBlock); diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 78b510bb4665..409bad72ee5a 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -2463,11 +2463,12 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, // Test our bit from the guard variable. llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1ULL << GuardNum); llvm::LoadInst *LI = Builder.CreateLoad(GuardAddr); - llvm::Value *IsInitialized = - Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero); + llvm::Value *NeedsInit = + Builder.CreateICmpEQ(Builder.CreateAnd(LI, Bit), Zero); llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init"); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end"); - Builder.CreateCondBr(IsInitialized, EndBlock, InitBlock); + CGF.EmitCXXGuardedInitBranch(NeedsInit, InitBlock, EndBlock, + CodeGenFunction::GuardKind::VariableGuard, &D); // Set our bit in the guard variable and emit the initializer and add a global // destructor if appropriate. @@ -2502,7 +2503,8 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D, Builder.CreateICmpSGT(FirstGuardLoad, InitThreadEpoch); llvm::BasicBlock *AttemptInitBlock = CGF.createBasicBlock("init.attempt"); llvm::BasicBlock *EndBlock = CGF.createBasicBlock("init.end"); - Builder.CreateCondBr(IsUninitialized, AttemptInitBlock, EndBlock); + CGF.EmitCXXGuardedInitBranch(IsUninitialized, AttemptInitBlock, EndBlock, + CodeGenFunction::GuardKind::VariableGuard, &D); // This BasicBlock attempts to determine whether or not this thread is // responsible for doing the initialization. diff --git a/clang/test/CodeGenCXX/microsoft-abi-static-initializers.cpp b/clang/test/CodeGenCXX/microsoft-abi-static-initializers.cpp index 57a72d4e2a6c..0b84f07e1154 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-static-initializers.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-static-initializers.cpp @@ -146,7 +146,7 @@ inline S &getS() { // CHECK-LABEL: define linkonce_odr dereferenceable({{[0-9]+}}) %struct.S* @"\01?getS@@YAAAUS@@XZ"() {{.*}} comdat // CHECK: load i32, i32* @"\01??_B?1??getS@@YAAAUS@@XZ@51" // CHECK: and i32 {{.*}}, 1 -// CHECK: icmp ne i32 {{.*}}, 0 +// CHECK: icmp eq i32 {{.*}}, 0 // CHECK: br i1 // init: // CHECK: or i32 {{.*}}, 1 diff --git a/clang/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp b/clang/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp index 0202586c8a62..3f53e631c964 100644 --- a/clang/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp +++ b/clang/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp @@ -24,8 +24,8 @@ extern inline S &f() { static thread_local S s; // CHECK: %[[guard:.*]] = load i32, i32* @"\01??__J?1??f@@YAAAUS@@XZ@51" // CHECK-NEXT: %[[mask:.*]] = and i32 %[[guard]], 1 -// CHECK-NEXT: %[[cmp:.*]] = icmp ne i32 %[[mask]], 0 -// CHECK-NEXT: br i1 %[[cmp]], label %[[init_end:.*]], label %[[init:.*]] +// CHECK-NEXT: %[[cmp:.*]] = icmp eq i32 %[[mask]], 0 +// CHECK-NEXT: br i1 %[[cmp]], label %[[init:.*]], label %[[init_end:.*]], !prof ![[unlikely_threadlocal:.*]] // // CHECK: [[init]]: // CHECK-NEXT: %[[or:.*]] = or i32 %[[guard]], 1 @@ -56,7 +56,7 @@ extern inline S &g() { // CHECK: %[[guard:.*]] = load atomic i32, i32* @"\01?$TSS0@?1??g@@YAAAUS@@XZ@4HA" unordered, align 4 // CHECK-NEXT: %[[epoch:.*]] = load i32, i32* @_Init_thread_epoch // CHECK-NEXT: %[[cmp:.*]] = icmp sgt i32 %[[guard]], %[[epoch]] -// CHECK-NEXT: br i1 %[[cmp]], label %[[init_attempt:.*]], label %[[init_end:.*]] +// CHECK-NEXT: br i1 %[[cmp]], label %[[init_attempt:.*]], label %[[init_end:.*]], !prof ![[unlikely_staticlocal:.*]] // // CHECK: [[init_attempt]]: // CHECK-NEXT: call void @_Init_thread_header(i32* @"\01?$TSS0@?1??g@@YAAAUS@@XZ@4HA") @@ -95,3 +95,6 @@ int g1() { static int i = f1(); return i; } + +// CHECK-DAG: ![[unlikely_threadlocal]] = !{!"branch_weights", i32 1, i32 1023} +// CHECK-DAG: ![[unlikely_staticlocal]] = !{!"branch_weights", i32 1, i32 1048575} diff --git a/clang/test/CodeGenCXX/static-init-wasm.cpp b/clang/test/CodeGenCXX/static-init-wasm.cpp index 289c3ea6024d..5f2f94fd85f3 100644 --- a/clang/test/CodeGenCXX/static-init-wasm.cpp +++ b/clang/test/CodeGenCXX/static-init-wasm.cpp @@ -20,7 +20,7 @@ void g() { // WEBASSEMBLY32: %[[R0:.+]] = load atomic i8, i8* bitcast (i32* @_ZGVZ1gvE1a to i8*) acquire, align 4 // WEBASSEMBLY32-NEXT: %[[R1:.+]] = and i8 %[[R0]], 1 // WEBASSEMBLY32-NEXT: %[[R2:.+]] = icmp eq i8 %[[R1]], 0 -// WEBASSEMBLY32-NEXT: br i1 %[[R2]], label %[[CHECK:.+]], label %[[END:.+]] +// WEBASSEMBLY32-NEXT: br i1 %[[R2]], label %[[CHECK:.+]], label %[[END:.+]], // WEBASSEMBLY32: [[CHECK]] // WEBASSEMBLY32: call i32 @__cxa_guard_acquire // WEBASSEMBLY32: [[END]] @@ -30,7 +30,7 @@ void g() { // WEBASSEMBLY64: %[[R0:.+]] = load atomic i8, i8* bitcast (i64* @_ZGVZ1gvE1a to i8*) acquire, align 8 // WEBASSEMBLY64-NEXT: %[[R1:.+]] = and i8 %[[R0]], 1 // WEBASSEMBLY64-NEXT: %[[R2:.+]] = icmp eq i8 %[[R1]], 0 -// WEBASSEMBLY64-NEXT: br i1 %[[R2]], label %[[CHECK:.+]], label %[[END:.+]] +// WEBASSEMBLY64-NEXT: br i1 %[[R2]], label %[[CHECK:.+]], label %[[END:.+]], // WEBASSEMBLY64: [[CHECK]] // WEBASSEMBLY64: call i32 @__cxa_guard_acquire // WEBASSEMBLY64: [[END]] diff --git a/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp b/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp new file mode 100644 index 000000000000..f9e77812714c --- /dev/null +++ b/clang/test/CodeGenCXX/static-initializer-branch-weights.cpp @@ -0,0 +1,126 @@ +// RUN: %clang_cc1 -emit-llvm -std=c++1z %s -o - -triple=x86_64-linux-gnu | FileCheck %s + +struct S { S(); ~S(); }; + +// CHECK-LABEL: define {{.*}}global_var_init +// CHECK-NOT: br +// CHECK: call void @_ZN1SC1Ev({{.*}}* @global) +S global; + +// CHECK-LABEL: define {{.*}}global_var_init +// FIXME: Do we really need thread-safe initialization here? We don't run +// global ctors on multiple threads. (If we were to do so, we'd need thread-safe +// init for B::member and B::inline_member too.) +// CHECK: load atomic i8, i8* bitcast (i64* @_ZGV13inline_global to i8*) acquire, +// CHECK: icmp eq i8 {{.*}}, 0 +// CHECK: br i1 +// CHECK-NOT: !prof +// CHECK: call void @_ZN1SC1Ev({{.*}}* @inline_global) +inline S inline_global; + +// CHECK-LABEL: define {{.*}}global_var_init +// CHECK-NOT: br +// CHECK: call void @_ZN1SC1Ev({{.*}}* @thread_local_global) +thread_local S thread_local_global; + +// CHECK-LABEL: define {{.*}}global_var_init +// CHECK: load i8, i8* bitcast (i64* @_ZGV26thread_local_inline_global to i8*) +// CHECK: icmp eq i8 {{.*}}, 0 +// CHECK: br i1 +// CHECK-NOT: !prof +// CHECK: call void @_ZN1SC1Ev({{.*}}* @thread_local_inline_global) +thread_local inline S thread_local_inline_global; + +struct A { + static S member; + static thread_local S thread_local_member; + + // CHECK-LABEL: define {{.*}}global_var_init + // CHECK: load atomic i8, i8* bitcast (i64* @_ZGVN1A13inline_memberE to i8*) acquire, + // CHECK: icmp eq i8 {{.*}}, 0 + // CHECK: br i1 + // CHECK-NOT: !prof + // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1A13inline_memberE) + static inline S inline_member; + + // CHECK-LABEL: define {{.*}}global_var_init + // CHECK: load i8, i8* bitcast (i64* @_ZGVN1A26thread_local_inline_memberE to i8*) + // CHECK: icmp eq i8 {{.*}}, 0 + // CHECK: br i1 + // CHECK-NOT: !prof + // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1A26thread_local_inline_memberE) + static thread_local inline S thread_local_inline_member; +}; + +// CHECK-LABEL: define void @_Z1fv() +void f() { + // CHECK: load atomic i8, i8* bitcast (i64* @_ZGVZ1fvE12static_local to i8*) acquire, + // CHECK: icmp eq i8 {{.*}}, 0 + // CHECK: br i1 {{.*}}, !prof ![[WEIGHTS_LOCAL:[0-9]*]] + static S static_local; + + // CHECK: load i8, i8* @_ZGVZ1fvE19static_thread_local, + // CHECK: icmp eq i8 {{.*}}, 0 + // CHECK: br i1 {{.*}}, !prof ![[WEIGHTS_THREAD_LOCAL:[0-9]*]] + static thread_local S static_thread_local; +} + +// CHECK-LABEL: define {{.*}}global_var_init +// CHECK-NOT: br +// CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1A6memberE) +S A::member; + +// CHECK-LABEL: define {{.*}}global_var_init +// CHECK-NOT: br +// CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1A19thread_local_memberE) +thread_local S A::thread_local_member; + +template struct B { + // CHECK-LABEL: define {{.*}}global_var_init + // CHECK: load i8, i8* bitcast (i64* @_ZGVN1BIiE6memberE to i8*) + // CHECK: icmp eq i8 {{.*}}, 0 + // CHECK: br i1 + // CHECK-NOT: !prof + // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1BIiE6memberE) + static S member; + + // CHECK-LABEL: define {{.*}}global_var_init + // CHECK: load i8, i8* bitcast (i64* @_ZGVN1BIiE13inline_memberE to i8*) + // CHECK: icmp eq i8 {{.*}}, 0 + // CHECK: br i1 + // CHECK-NOT: !prof + // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1BIiE13inline_memberE) + static inline S inline_member; + + // CHECK-LABEL: define {{.*}}global_var_init + // CHECK: load i8, i8* bitcast (i64* @_ZGVN1BIiE19thread_local_memberE to i8*) + // CHECK: icmp eq i8 {{.*}}, 0 + // CHECK: br i1 + // CHECK-NOT: !prof + // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1BIiE19thread_local_memberE) + static thread_local S thread_local_member; + + // CHECK-LABEL: define {{.*}}global_var_init + // CHECK: load i8, i8* bitcast (i64* @_ZGVN1BIiE26thread_local_inline_memberE to i8*) + // CHECK: icmp eq i8 {{.*}}, 0 + // CHECK: br i1 + // CHECK-NOT: !prof + // CHECK: call void @_ZN1SC1Ev({{.*}}* @_ZN1BIiE26thread_local_inline_memberE) + static thread_local inline S thread_local_inline_member; +}; +template S B::member; +template thread_local S B::thread_local_member; + +template void use(T &...); +void use_b() { + use(B::member, B::inline_member, B::thread_local_member, + B::thread_local_inline_member); +} + +// CHECK-LABEL: define {{.*}}tls_init() +// CHECK: load i8, i8* @__tls_guard, align 1 +// CHECK: icmp eq i8 {{.*}}, 0 +// CHECK: br i1 {{.*}}, !prof ![[WEIGHTS_THREAD_LOCAL]] + +// CHECK-DAG: ![[WEIGHTS_THREAD_LOCAL]] = !{!"branch_weights", i32 1, i32 1023} +// CHECK-DAG: ![[WEIGHTS_LOCAL]] = !{!"branch_weights", i32 1, i32 1048575}