Add an option to save the backend-produced YAML optimization record to a file

The backend now has the capability to save information from optimizations, the
same information that can be used to generate optimization diagnostics but in
machine-consumable form, into an output file. This can be enabled when using
opt (see r282539), and this change enables it when using clang. The idea is
that other tools will be able to consume these files, and perhaps in
combination with the original source code, produce various kinds of
optimization reports for users (and for compiler developers).

We now have at-least two tools that can consume these files:
  * tools/llvm-opt-report
  * utils/opt-viewer

Using the flag -fsave-optimization-record will cause the YAML file to be
generated; the file name will be based on the output file name (if we're using
-c or -S and have an output name), or the input file name. When we're using
CUDA, or some other offloading mechanism, separate files are generated for each
backend target. The output file name can be specified by the user using
-foptimization-record-file=filename.

Differential Revision: https://reviews.llvm.org/D25225

llvm-svn: 283834
This commit is contained in:
Hal Finkel 2016-10-11 00:26:09 +00:00
parent 4043ca7394
commit 8f96e82cb8
9 changed files with 153 additions and 0 deletions

View File

@ -507,6 +507,9 @@ def arcmt_modify : Flag<["-"], "arcmt-modify">,
def arcmt_migrate : Flag<["-"], "arcmt-migrate">,
HelpText<"Apply modifications and produces temporary files that conform to ARC">;
def opt_record_file : Separate<["-"], "opt-record-file">,
HelpText<"File name to use for YAML optimization record output">;
def print_stats : Flag<["-"], "print-stats">,
HelpText<"Print performance metrics and statistics">;
def stats_file : Joined<["-"], "stats-file=">,

View File

@ -1192,6 +1192,15 @@ def ftemplate_backtrace_limit_EQ : Joined<["-"], "ftemplate-backtrace-limit=">,
Group<f_Group>;
def foperator_arrow_depth_EQ : Joined<["-"], "foperator-arrow-depth=">,
Group<f_Group>;
def fsave_optimization_record : Flag<["-"], "fsave-optimization-record">,
Group<f_Group>, HelpText<"Generate a YAML optimization record file">;
def fno_save_optimization_record : Flag<["-"], "fno-save-optimization-record">,
Group<f_Group>, Flags<[NoArgumentUnused]>;
def foptimization_record_file_EQ : Joined<["-"], "foptimization-record-file=">,
Group<f_Group>,
HelpText<"Specify the file name of any generated YAML optimization record">;
def ftest_coverage : Flag<["-"], "ftest-coverage">, Group<f_Group>;
def fvectorize : Flag<["-"], "fvectorize">, Group<f_Group>,
HelpText<"Enable the loop vectorization passes">;

View File

@ -181,6 +181,10 @@ public:
/// object file.
std::vector<std::string> CudaGpuBinaryFileNames;
/// The name of the file to which the backend should save YAML optimization
/// records.
std::string OptRecordFile;
/// Regular expression to select optimizations for which we should enable
/// optimization remarks. Transformation passes whose name matches this
/// expression (and support this feature), will emit a diagnostic

View File

@ -33,6 +33,8 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/YAMLTraits.h"
#include <memory>
using namespace clang;
using namespace llvm;
@ -181,6 +183,24 @@ namespace clang {
Ctx.setDiagnosticHandler(DiagnosticHandler, this);
Ctx.setDiagnosticHotnessRequested(CodeGenOpts.DiagnosticsWithHotness);
std::unique_ptr<llvm::tool_output_file> OptRecordFile;
if (!CodeGenOpts.OptRecordFile.empty()) {
std::error_code EC;
OptRecordFile =
llvm::make_unique<llvm::tool_output_file>(CodeGenOpts.OptRecordFile,
EC, sys::fs::F_None);
if (EC) {
Diags.Report(diag::err_cannot_open_file) <<
CodeGenOpts.OptRecordFile << EC.message();
return;
}
Ctx.setDiagnosticsOutputFile(new yaml::Output(OptRecordFile->os()));
if (CodeGenOpts.getProfileUse() != CodeGenOptions::ProfileNone)
Ctx.setDiagnosticHotnessRequested(true);
}
// Link LinkModule into this module if present, preserving its validity.
for (auto &I : LinkModules) {
unsigned LinkFlags = I.first;
@ -198,6 +218,9 @@ namespace clang {
Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext);
Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext);
if (OptRecordFile)
OptRecordFile->keep();
}
void HandleTagDeclDefinition(TagDecl *D) override {

View File

@ -6080,6 +6080,39 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-fno-math-builtin");
}
if (Args.hasFlag(options::OPT_fsave_optimization_record,
options::OPT_fno_save_optimization_record, false)) {
CmdArgs.push_back("-opt-record-file");
const Arg *A = Args.getLastArg(options::OPT_foptimization_record_file_EQ);
if (A) {
CmdArgs.push_back(A->getValue());
} else {
SmallString<128> F;
if (Output.isFilename() && (Args.hasArg(options::OPT_c) ||
Args.hasArg(options::OPT_S))) {
F = Output.getFilename();
} else {
// Use the compilation directory.
F = llvm::sys::path::stem(Input.getBaseInput());
// If we're compiling for an offload architecture (i.e. a CUDA device),
// we need to make the file name for the device compilation different
// from the host compilation.
if (!JA.isDeviceOffloading(Action::OFK_None) &&
!JA.isDeviceOffloading(Action::OFK_Host)) {
llvm::sys::path::replace_extension(F, "");
F += JA.getOffloadingFileNamePrefix(Triple.normalize());
F += "-";
F += JA.getOffloadingArch();
}
}
llvm::sys::path::replace_extension(F, "opt.yaml");
CmdArgs.push_back(Args.MakeArgString(F));
}
}
// Default to -fno-builtin-str{cat,cpy} on Darwin for ARM.
//
// FIXME: Now that PR4941 has been fixed this can be enabled.

View File

@ -826,6 +826,10 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
Opts.LinkerOptions = Args.getAllArgValues(OPT_linker_option);
bool NeedLocTracking = false;
Opts.OptRecordFile = Args.getLastArgValue(OPT_opt_record_file);
if (!Opts.OptRecordFile.empty())
NeedLocTracking = true;
if (Arg *A = Args.getLastArg(OPT_Rpass_EQ)) {
Opts.OptimizationRemarkPattern =
GenerateOptimizationRemarkRegex(Diags, Args, A);

View File

@ -0,0 +1,26 @@
foo
# Func Hash:
0
# Num Counters:
1
# Counter Values:
30
bar
# Func Hash:
0
# Num Counters:
1
# Counter Values:
30
Test
# Func Hash:
269
# Num Counters:
3
# Counter Values:
1
30
15

View File

@ -0,0 +1,33 @@
// RUN: %clang_cc1 -O3 -triple x86_64-unknown-linux-gnu -target-cpu x86-64 %s -o %t -dwarf-column-info -opt-record-file %t.yaml -emit-obj
// RUN: cat %t.yaml | FileCheck %s
// RUN: llvm-profdata merge %S/Inputs/opt-record.proftext -o %t.profdata
// RUN: %clang_cc1 -O3 -triple x86_64-unknown-linux-gnu -target-cpu x86-64 -fprofile-instrument-use-path=%t.profdata %s -o %t -dwarf-column-info -opt-record-file %t.yaml -emit-obj
// RUN: cat %t.yaml | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PGO %s
// REQUIRES: x86-registered-target
void bar();
void foo() { bar(); }
void Test(int *res, int *c, int *d, int *p, int n) {
int i;
#pragma clang loop vectorize(assume_safety)
for (i = 0; i < 1600; i++) {
res[i] = (p[i] == 0) ? res[i] : res[i] + d[i];
}
}
// CHECK: --- !Missed
// CHECK: Pass: inline
// CHECK: Name: NoDefinition
// CHECK: DebugLoc:
// CHECK: Function: foo
// CHECK-PGO: Hotness:
// CHECK: --- !Passed
// CHECK: Pass: loop-vectorize
// CHECK: Name: Vectorized
// CHECK: DebugLoc:
// CHECK: Function: Test
// CHECK-PGO: Hotness:

View File

@ -0,0 +1,18 @@
// RUN: %clang -### -S -o FOO -fsave-optimization-record %s 2>&1 | FileCheck %s
// RUN: %clang -### -c -o FOO -fsave-optimization-record %s 2>&1 | FileCheck %s
// RUN: %clang -### -c -fsave-optimization-record %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O
// RUN: %clang -### -fsave-optimization-record %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O
// RUN: %clang -### -S -fsave-optimization-record -x cuda -nocudainc -nocudalib %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O -check-prefix=CHECK-CUDA-DEV
// RUN: %clang -### -fsave-optimization-record -x cuda -nocudainc -nocudalib %s 2>&1 | FileCheck %s -check-prefix=CHECK-NO-O -check-prefix=CHECK-CUDA-DEV
// RUN: %clang -### -S -o FOO -fsave-optimization-record -foptimization-record-file=BAR.txt %s 2>&1 | FileCheck %s -check-prefix=CHECK-EQ
// CHECK: "-cc1"
// CHECK: "-opt-record-file" "FOO.opt.yaml"
// CHECK-NO-O: "-cc1"
// CHECK-NO-O-DAG: "-opt-record-file" "opt-record.opt.yaml"
// CHECK-CUDA-DEV-DAG: "-opt-record-file" "opt-record-device-cuda-nvptx64-nvidia-cuda-sm_20.opt.yaml"
// CHECK-EQ: "-cc1"
// CHECK-EQ: "-opt-record-file" "BAR.txt"