[BOLT] Add aarch64 support for peephole passes

Enable peephole optimizations for aarch64.
Also small code refactoring - add PeepholeOpts under Peepholes class.

Vladislav Khmelevsky,
Advanced Software Technology Lab, Huawei

Differential Revision: https://reviews.llvm.org/D118732
This commit is contained in:
Vladislav Khmelevsky 2022-02-01 23:41:07 +03:00
parent 2c26cfdef7
commit 19fb5a210d
6 changed files with 130 additions and 33 deletions

View File

@ -295,6 +295,16 @@ public:
/// Perform simple peephole optimizations.
class Peepholes : public BinaryFunctionPass {
public:
enum PeepholeOpts : char {
PEEP_NONE = 0x0,
PEEP_DOUBLE_JUMPS = 0x2,
PEEP_TAILCALL_TRAPS = 0x4,
PEEP_USELESS_BRANCHES = 0x8,
PEEP_ALL = 0xf
};
private:
uint64_t NumDoubleJumps{0};
uint64_t TailCallTraps{0};
uint64_t NumUselessCondBranches{0};

View File

@ -105,29 +105,19 @@ MinBranchClusters("min-branch-clusters",
cl::Hidden,
cl::cat(BoltOptCategory));
enum PeepholeOpts : char {
PEEP_NONE = 0x0,
PEEP_DOUBLE_JUMPS = 0x2,
PEEP_TAILCALL_TRAPS = 0x4,
PEEP_USELESS_BRANCHES = 0x8,
PEEP_ALL = 0xf
};
static cl::list<PeepholeOpts>
Peepholes("peepholes",
cl::CommaSeparated,
cl::desc("enable peephole optimizations"),
cl::value_desc("opt1,opt2,opt3,..."),
cl::values(
clEnumValN(PEEP_NONE, "none", "disable peepholes"),
clEnumValN(PEEP_DOUBLE_JUMPS, "double-jumps",
"remove double jumps when able"),
clEnumValN(PEEP_TAILCALL_TRAPS, "tailcall-traps", "insert tail call traps"),
clEnumValN(PEEP_USELESS_BRANCHES, "useless-branches",
"remove useless conditional branches"),
clEnumValN(PEEP_ALL, "all", "enable all peephole optimizations")),
cl::ZeroOrMore,
cl::cat(BoltOptCategory));
static cl::list<Peepholes::PeepholeOpts> Peepholes(
"peepholes", cl::CommaSeparated, cl::desc("enable peephole optimizations"),
cl::value_desc("opt1,opt2,opt3,..."),
cl::values(clEnumValN(Peepholes::PEEP_NONE, "none", "disable peepholes"),
clEnumValN(Peepholes::PEEP_DOUBLE_JUMPS, "double-jumps",
"remove double jumps when able"),
clEnumValN(Peepholes::PEEP_TAILCALL_TRAPS, "tailcall-traps",
"insert tail call traps"),
clEnumValN(Peepholes::PEEP_USELESS_BRANCHES, "useless-branches",
"remove useless conditional branches"),
clEnumValN(Peepholes::PEEP_ALL, "all",
"enable all peephole optimizations")),
cl::ZeroOrMore, cl::cat(BoltOptCategory));
static cl::opt<unsigned>
PrintFuncStat("print-function-statistics",
@ -1092,20 +1082,20 @@ void Peepholes::removeUselessCondBranches(BinaryFunction &Function) {
}
void Peepholes::runOnFunctions(BinaryContext &BC) {
const char Opts = std::accumulate(
opts::Peepholes.begin(), opts::Peepholes.end(), 0,
[](const char A, const opts::PeepholeOpts B) { return A | B; });
if (Opts == opts::PEEP_NONE || !BC.isX86())
const char Opts =
std::accumulate(opts::Peepholes.begin(), opts::Peepholes.end(), 0,
[](const char A, const PeepholeOpts B) { return A | B; });
if (Opts == PEEP_NONE)
return;
for (auto &It : BC.getBinaryFunctions()) {
BinaryFunction &Function = It.second;
if (shouldOptimize(Function)) {
if (Opts & opts::PEEP_DOUBLE_JUMPS)
if (Opts & PEEP_DOUBLE_JUMPS)
NumDoubleJumps += fixDoubleJumps(Function, false);
if (Opts & opts::PEEP_TAILCALL_TRAPS)
if (Opts & PEEP_TAILCALL_TRAPS)
addTailcallTraps(Function);
if (Opts & opts::PEEP_USELESS_BRANCHES)
if (Opts & PEEP_USELESS_BRANCHES)
removeUselessCondBranches(Function);
assert(Function.validateCFG());
}

View File

@ -798,6 +798,13 @@ public:
createShortJmp(Seq, Target, Ctx, /*IsTailCall*/ true);
}
bool createTrap(MCInst &Inst) const override {
Inst.clear();
Inst.setOpcode(AArch64::BRK);
Inst.addOperand(MCOperand::createImm(1));
return true;
}
bool convertJmpToTailCall(MCInst &Inst) override {
setTailCall(Inst);
return true;

View File

@ -0,0 +1,55 @@
// A contrived example to test the double jump removal peephole.
// RUN: %clang %cflags -O0 %s -o %t.exe
// RUN: llvm-bolt %t.exe -o %t.bolt -peepholes=double-jumps | \
// RUN: FileCheck %s -check-prefix=CHECKBOLT
// RUN: llvm-objdump -d %t.bolt | FileCheck %s
// CHECKBOLT: BOLT-INFO: Peephole: 1 double jumps patched.
// CHECK: <_Z3foom>:
// CHECK-NEXT: sub sp, sp, #16
// CHECK-NEXT: str x0, [sp, #8]
// CHECK-NEXT: ldr [[REG:x[0-28]+]], [sp, #8]
// CHECK-NEXT: cmp [[REG]], #0
// CHECK-NEXT: b.eq {{.*}} <_Z3foom+0x34>
// CHECK-NEXT: add [[REG]], [[REG]], #1
// CHECK-NEXT: add [[REG]], [[REG]], #1
// CHECK-NEXT: cmp [[REG]], #2
// CHECK-NEXT: b.eq {{.*}} <_Z3foom+0x28>
// CHECK-NEXT: add [[REG]], [[REG]], #1
// CHECK-NEXT: mov [[REG]], x1
// CHECK-NEXT: ldr x1, [sp]
// CHECK-NEXT: b {{.*}} <bar>
// CHECK-NEXT: ldr x1, [sp]
// CHECK-NEXT: add [[REG]], [[REG]], #1
// CHECK-NEXT: b {{.*}} <bar>
extern "C" unsigned long bar(unsigned long count) { return count + 1; }
unsigned long foo(unsigned long count) {
asm volatile(" cmp %0,#0\n"
" b.eq .L7\n"
" add %0, %0, #1\n"
" b .L1\n"
".L1: b .L2\n"
".L2: add %0, %0, #1\n"
" cmp %0, #2\n"
" b.ne .L3\n"
" b .L4\n"
".L3: b .L5\n"
".L5: add %0, %0, #1\n"
".L4: mov %0,x1\n"
" ldr x1, [sp]\n"
" b .L6\n"
".L7: ldr x1, [sp]\n"
" add %0, %0, #1\n"
" b .L6\n"
".L6: b bar\n"
:
: "r"(count)
:);
return count;
}
int main(int argc, const char *argv[]) { return foo(38); }

View File

@ -0,0 +1,37 @@
## Tests the peephole that adds trap instructions following indirect tail calls.
# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown \
# RUN: %s -o %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -o %t.bolt -peepholes=tailcall-traps \
# RUN: -print-peepholes -funcs=foo,bar 2>&1 | FileCheck %s
# CHECK: Binary Function "foo"
# CHECK: br x0 # TAILCALL
# CHECK-NEXT: brk #0x1
# CHECK: End of Function "foo"
# CHECK: Binary Function "bar"
# CHECK: b foo # TAILCALL
# CHECK: End of Function "bar"
.text
.align 4
.global main
.type main, %function
main:
nop
ret
.size main, .-main
.global foo
.type foo, %function
foo:
br x0
.size foo, .-foo
.global bar
.type bar, %function
bar:
b foo
.size bar, .-bar

View File

@ -1,6 +1,4 @@
/*
* A contrived example to test the double jump removal peephole.
*/
// A contrived example to test the double jump removal peephole.
extern "C" unsigned long bar(unsigned long count) {
return count + 1;