[ThinLTO] Enable ThinLTO WholeProgramDevirt and LowerTypeTests in new PM

Summary: Enable these passes for CFI and WPD in ThinLTO and LTO with the new pass manager. Add a couple of tests for both PMs based on the clang tests tools/clang/test/CodeGen/thinlto-distributed-cfi*.ll, but just test through llvm-lto2 and not with distributed ThinLTO. Reviewers: pcc Subscribers: mehdi_amini, inglorion, eraman, steven_wu, dexonsmith, llvm-commits Differential Revision: https://reviews.llvm.org/D49429 llvm-svn: 337461
2018-07-19 14:51:32 +00:00 · 2018-07-19 14:51:32 +00:00 · 28023dbed7
parent ed2605d36d
commit 28023dbed7
11 changed files with 227 additions and 32 deletions
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@ -27,6 +27,7 @@ namespace llvm {
 class StringRef;
 class AAManager;
 class TargetMachine;
+class ModuleSummaryIndex;

 /// A struct capturing PGO tunables.
 struct PGOOptions {
@ -310,8 +311,9 @@ public:
  /// only intended for use when attempting to optimize code. If frontends
  /// require some transformations for semantic reasons, they should explicitly
  /// build them.
-  ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level,
-                                                bool DebugLogging = false);
+  ModulePassManager
+  buildThinLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
+                              const ModuleSummaryIndex *ImportSummary);

  /// Build a pre-link, LTO-targeting default optimization pipeline to a pass
  /// manager.
@ -340,7 +342,8 @@ public:
  /// require some transformations for semantic reasons, they should explicitly
  /// build them.
  ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level,
-                                            bool DebugLogging = false);
+                                            bool DebugLogging,
+                                            ModuleSummaryIndex *ExportSummary);

  /// Build the default `AAManager` with the default alias analysis pipeline
  /// registered.
--- a/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h
+++ b/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h
@ -26,6 +26,7 @@
 namespace llvm {

 class Module;
+class ModuleSummaryIndex;
 class raw_ostream;

 namespace lowertypetests {
@ -197,6 +198,11 @@ struct ByteArrayBuilder {

 class LowerTypeTestsPass : public PassInfoMixin<LowerTypeTestsPass> {
 public:
+  ModuleSummaryIndex *ExportSummary;
+  const ModuleSummaryIndex *ImportSummary;
+  LowerTypeTestsPass(ModuleSummaryIndex *ExportSummary,
+                     const ModuleSummaryIndex *ImportSummary)
+      : ExportSummary(ExportSummary), ImportSummary(ImportSummary) {}
  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };

--- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@ -28,6 +28,7 @@ template <typename T> class ArrayRef;
 template <typename T> class MutableArrayRef;
 class Function;
 class GlobalVariable;
+class ModuleSummaryIndex;

 namespace wholeprogramdevirt {

@ -218,6 +219,13 @@ void setAfterReturnValues(MutableArrayRef<VirtualCallTarget> Targets,
 } // end namespace wholeprogramdevirt

 struct WholeProgramDevirtPass : public PassInfoMixin<WholeProgramDevirtPass> {
+  ModuleSummaryIndex *ExportSummary;
+  const ModuleSummaryIndex *ImportSummary;
+  WholeProgramDevirtPass(ModuleSummaryIndex *ExportSummary,
+                         const ModuleSummaryIndex *ImportSummary)
+      : ExportSummary(ExportSummary), ImportSummary(ImportSummary) {
+    assert(!(ExportSummary && ImportSummary));
+  }
  PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
 };

--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@ -144,7 +144,9 @@ createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) {
 }

 static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
-                           unsigned OptLevel, bool IsThinLTO) {
+                           unsigned OptLevel, bool IsThinLTO,
+                           ModuleSummaryIndex *ExportSummary,
+                           const ModuleSummaryIndex *ImportSummary) {
  Optional<PGOOptions> PGOOpt;
  if (!Conf.SampleProfile.empty())
    PGOOpt = PGOOptions("", "", Conf.SampleProfile, false, true);
@ -194,9 +196,10 @@ static void runNewPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
  }

  if (IsThinLTO)
-    MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager);
+    MPM = PB.buildThinLTODefaultPipeline(OL, Conf.DebugPassManager,
+                                         ImportSummary);
  else
-    MPM = PB.buildLTODefaultPipeline(OL, Conf.DebugPassManager);
+    MPM = PB.buildLTODefaultPipeline(OL, Conf.DebugPassManager, ExportSummary);
  MPM.run(Mod, MAM);

  // FIXME (davide): verify the output.
@ -279,7 +282,8 @@ bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
    runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
                         Conf.DisableVerify);
  else if (Conf.UseNewPM)
-    runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO);
+    runNewPMPasses(Conf, Mod, TM, Conf.OptLevel, IsThinLTO, ExportSummary,
+                   ImportSummary);
  else
    runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary);
  return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@ -922,15 +922,28 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
  return MPM;
 }

-ModulePassManager
-PassBuilder::buildThinLTODefaultPipeline(OptimizationLevel Level,
-                                         bool DebugLogging) {
-  // FIXME: The summary index is not hooked in the new pass manager yet.
-  // When it's going to be hooked, enable WholeProgramDevirt and LowerTypeTest
-  // here.
-
+ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
+    OptimizationLevel Level, bool DebugLogging,
+    const ModuleSummaryIndex *ImportSummary) {
  ModulePassManager MPM(DebugLogging);

+  if (ImportSummary) {
+    // These passes import type identifier resolutions for whole-program
+    // devirtualization and CFI. They must run early because other passes may
+    // disturb the specific instruction patterns that these passes look for,
+    // creating dependencies on resolutions that may not appear in the summary.
+    //
+    // For example, GVN may transform the pattern assume(type.test) appearing in
+    // two basic blocks into assume(phi(type.test, type.test)), which would
+    // transform a dependency on a WPD resolution into a dependency on a type
+    // identifier resolution for CFI.
+    //
+    // Also, WPD has access to more precise information than ICP and can
+    // devirtualize more effectively, so it should operate on the IR first.
+    MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
+    MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
+  }
+
  // Force any function attributes we want the rest of the pipeline to observe.
  MPM.addPass(ForceFunctionAttrsPass());

@ -961,8 +974,9 @@ PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level,
  return buildPerModuleDefaultPipeline(Level, DebugLogging);
 }

-ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
-                                                       bool DebugLogging) {
+ModulePassManager
+PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
+                                     ModuleSummaryIndex *ExportSummary) {
  assert(Level != O0 && "Must request optimizations for the default pipeline!");
  ModulePassManager MPM(DebugLogging);

@ -1012,11 +1026,15 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,

  // Run whole program optimization of virtual call when the list of callees
  // is fixed.
-  MPM.addPass(WholeProgramDevirtPass());
+  MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));

  // Stop here at -O1.
-  if (Level == 1)
+  if (Level == 1) {
+    // The LowerTypeTestsPass needs to run to lower type metadata and the
+    // type.test intrinsics. The pass does nothing if CFI is disabled.
+    MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
    return MPM;
+  }

  // Optimize globals to try and fold them into constants.
  MPM.addPass(GlobalOptPass());
@ -1125,12 +1143,7 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
  // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
  // to be run at link time if CFI is enabled. This pass does nothing if
  // CFI is disabled.
-  // Enable once we add support for the summary in the new PM.
-#if 0
-  MPM.addPass(LowerTypeTestsPass(Summary ? PassSummaryAction::Export :
-                                           PassSummaryAction::None,
-                                Summary));
-#endif
+  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));

  // Add late LTO optimization passes.
  // Delete basic blocks, which optimization passes may have killed.
@ -1442,12 +1455,12 @@ bool PassBuilder::parseModulePass(ModulePassManager &MPM,
    } else if (Matches[1] == "thinlto-pre-link") {
      MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L, DebugLogging));
    } else if (Matches[1] == "thinlto") {
-      MPM.addPass(buildThinLTODefaultPipeline(L, DebugLogging));
+      MPM.addPass(buildThinLTODefaultPipeline(L, DebugLogging, nullptr));
    } else if (Matches[1] == "lto-pre-link") {
      MPM.addPass(buildLTOPreLinkDefaultPipeline(L, DebugLogging));
    } else {
      assert(Matches[1] == "lto" && "Not one of the matched options!");
-      MPM.addPass(buildLTODefaultPipeline(L, DebugLogging));
+      MPM.addPass(buildLTODefaultPipeline(L, DebugLogging, nullptr));
    }
    return true;
  }
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@ -56,7 +56,7 @@ MODULE_PASS("instrprof", InstrProfiling())
 MODULE_PASS("internalize", InternalizePass())
 MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
 MODULE_PASS("ipsccp", IPSCCPPass())
-MODULE_PASS("lowertypetests", LowerTypeTestsPass())
+MODULE_PASS("lowertypetests", LowerTypeTestsPass(nullptr, nullptr))
 MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
 MODULE_PASS("no-op-module", NoOpModulePass())
 MODULE_PASS("partial-inliner", PartialInlinerPass())
@ -75,7 +75,7 @@ MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass())
 MODULE_PASS("sample-profile", SampleProfileLoaderPass())
 MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
 MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
-MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass())
+MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr))
 MODULE_PASS("verify", VerifierPass())
 #undef MODULE_PASS

--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@ -2102,9 +2102,7 @@ bool LowerTypeTestsModule::lower() {

 PreservedAnalyses LowerTypeTestsPass::run(Module &M,
                                          ModuleAnalysisManager &AM) {
-  bool Changed = LowerTypeTestsModule(M, /*ExportSummary=*/nullptr,
-                                      /*ImportSummary=*/nullptr)
-                     .lower();
+  bool Changed = LowerTypeTestsModule(M, ExportSummary, ImportSummary).lower();
  if (!Changed)
    return PreservedAnalyses::all();
  return PreservedAnalyses::none();
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@ -611,7 +611,8 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
  auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
    return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
  };
-  if (!DevirtModule(M, AARGetter, OREGetter, nullptr, nullptr).run())
+  if (!DevirtModule(M, AARGetter, OREGetter, ExportSummary, ImportSummary)
+           .run())
    return PreservedAnalyses::all();
  return PreservedAnalyses::none();
 }
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@ -54,6 +54,7 @@
 ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
 ; CHECK-O-NEXT: Running pass: GlobalSplitPass
 ; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass
+; CHECK-O1-NEXT: Running pass: LowerTypeTestsPass
 ; CHECK-O2-NEXT: Running pass: GlobalOptPass
 ; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PromotePass>
 ; CHECK-O2-NEXT: Running analysis: DominatorTreeAnalysis
@ -82,6 +83,7 @@
 ; CHECK-O2-NEXT: Running analysis: MemoryDependenceAnalysis
 ; CHECK-O2-NEXT: Running analysis: DemandedBitsAnalysis
 ; CHECK-O2-NEXT: Running pass: CrossDSOCFIPass
+; CHECK-O2-NEXT: Running pass: LowerTypeTestsPass
 ; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}SimplifyCFGPass>
 ; CHECK-O2-NEXT: Running pass: EliminateAvailableExternallyPass
 ; CHECK-O2-NEXT: Running pass: GlobalDCEPass
--- a/llvm/test/ThinLTO/X86/cfi-devirt.ll
+++ b/llvm/test/ThinLTO/X86/cfi-devirt.ll
@ -0,0 +1,100 @@
+; REQUIRES: x86-registered-target
+
+; Test CFI devirtualization through the thin link and backend.
+
+; RUN: opt -thinlto-bc -o %t.o %s
+
+; Legacy PM
+; RUN: llvm-lto2 run %t.o -save-temps \
+; RUN:   -o %t3 \
+; RUN:   -r=%t.o,test,px \
+; RUN:   -r=%t.o,_ZN1A1nEi,p \
+; RUN:   -r=%t.o,_ZN1B1fEi,p \
+; RUN:   -r=%t.o,_ZN1C1fEi,p \
+; RUN:   -r=%t.o,_ZTV1B, \
+; RUN:   -r=%t.o,_ZTV1C, \
+; RUN:   -r=%t.o,_ZN1A1nEi, \
+; RUN:   -r=%t.o,_ZN1B1fEi, \
+; RUN:   -r=%t.o,_ZN1C1fEi, \
+; RUN:   -r=%t.o,_ZTV1B,px \
+; RUN:   -r=%t.o,_ZTV1C,px
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
+
+; New PM
+; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm \
+; RUN:   -o %t3 \
+; RUN:   -r=%t.o,test,px \
+; RUN:   -r=%t.o,_ZN1A1nEi,p \
+; RUN:   -r=%t.o,_ZN1B1fEi,p \
+; RUN:   -r=%t.o,_ZN1C1fEi,p \
+; RUN:   -r=%t.o,_ZTV1B, \
+; RUN:   -r=%t.o,_ZTV1C, \
+; RUN:   -r=%t.o,_ZN1A1nEi, \
+; RUN:   -r=%t.o,_ZN1B1fEi, \
+; RUN:   -r=%t.o,_ZN1C1fEi, \
+; RUN:   -r=%t.o,_ZTV1B,px \
+; RUN:   -r=%t.o,_ZTV1C,px
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.A = type { i32 (...)** }
+%struct.B = type { %struct.A }
+%struct.C = type { %struct.A }
+
+@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1
+@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2
+
+; CHECK-IR-LABEL: define i32 @test
+define i32 @test(%struct.A* %obj, i32 %a) {
+entry:
+  %0 = bitcast %struct.A* %obj to i8**
+  %vtable5 = load i8*, i8** %0
+
+  %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 8, metadata !"_ZTS1A")
+  %2 = extractvalue { i8*, i1 } %1, 1
+  br i1 %2, label %cont, label %trap
+
+trap:
+  tail call void @llvm.trap()
+  unreachable
+
+cont:
+  %3 = extractvalue { i8*, i1 } %1, 0
+  %4 = bitcast i8* %3 to i32 (%struct.A*, i32)*
+
+  ; Check that the call was devirtualized.
+  ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi
+  %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a)
+  %vtable16 = load i8*, i8** %0
+  %5 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable16, i32 0, metadata !"_ZTS1A")
+  %6 = extractvalue { i8*, i1 } %5, 1
+  br i1 %6, label %cont2, label %trap
+
+cont2:
+  %7 = extractvalue { i8*, i1 } %5, 0
+  %8 = bitcast i8* %7 to i32 (%struct.A*, i32)*
+
+  ; Check that traps are conditional. Invalid TYPE_ID can cause
+  ; unconditional traps.
+  ; CHECK-IR: br i1 {{.*}}, label %trap
+
+  ; We still have to call it as virtual.
+  ; CHECK-IR: %call3 = tail call i32 %8
+  %call3 = tail call i32 %8(%struct.A* nonnull %obj, i32 %call)
+  ret i32 %call3
+}
+; CHECK-IR-LABEL: ret i32
+; CHECK-IR-LABEL: }
+
+declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata)
+declare void @llvm.trap()
+
+declare i32 @_ZN1B1fEi(%struct.B* %this, i32 %a)
+declare i32 @_ZN1A1nEi(%struct.A* %this, i32 %a)
+declare i32 @_ZN1C1fEi(%struct.C* %this, i32 %a)
+
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTS1B"}
+!2 = !{i64 16, !"_ZTS1C"}
--- a/llvm/test/ThinLTO/X86/cfi.ll
+++ b/llvm/test/ThinLTO/X86/cfi.ll
@ -0,0 +1,60 @@
+; REQUIRES: x86-registered-target
+
+; Test CFI through the thin link and backend.
+
+; RUN: opt -thinlto-bc -o %t.o %s
+
+; Legacy PM
+; RUN: llvm-lto2 run -save-temps %t.o \
+; RUN:   -o %t3 \
+; RUN:   -r=%t.o,test,px \
+; RUN:   -r=%t.o,_ZTV1B, \
+; RUN:   -r=%t.o,_ZN1B1fEi, \
+; RUN:   -r=%t.o,_ZTV1B,px
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
+
+; New PM
+; RUN: llvm-lto2 run -save-temps %t.o -use-new-pm \
+; RUN:   -o %t3 \
+; RUN:   -r=%t.o,test,px \
+; RUN:   -r=%t.o,_ZTV1B, \
+; RUN:   -r=%t.o,_ZN1B1fEi, \
+; RUN:   -r=%t.o,_ZTV1B,px
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.B = type { %struct.A }
+%struct.A = type { i32 (...)** }
+
+@_ZTV1B = constant { [3 x i8*] } { [3 x i8*] [i8* undef, i8* undef, i8* undef] }, !type !0
+
+; CHECK-IR-LABEL: define void @test
+define void @test(i8* %b) {
+entry:
+  ; Ensure that traps are conditional. Invalid TYPE_ID can cause
+  ; unconditional traps.
+  ; CHECK-IR: br i1 {{.*}}, label %trap
+  %0 = bitcast i8* %b to i8**
+  %vtable2 = load i8*, i8** %0
+  %1 = tail call i1 @llvm.type.test(i8* %vtable2, metadata !"_ZTS1A")
+  br i1 %1, label %cont, label %trap
+
+trap:
+  tail call void @llvm.trap()
+  unreachable
+
+cont:
+  ; CHECK-IR-LABEL: ret void
+  ret void
+}
+; CHECK-IR-LABEL: }
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.trap()
+
+declare i32 @_ZN1B1fEi(%struct.B* %this, i32 %a)
+
+!0 = !{i64 16, !"_ZTS1A"}
+!1 = !{i64 16, !"_ZTS1B"}