[asan_symbolize] Add a simple plugin architecture

Summary:
This change adds a simple plugin architecture to `asan_symbolize.py`.
The motivation here is that sometimes it's necessary to perform extra
work to figure out where binaries with debug symbols can actually be
found. For example it might be the case that a remote service needs
to be queried for binaries and then copied to the local system.

This "extra work" can be extremely site-specific such that adding the
code directly into the `asan_symbolize.py` would just clutter the code
for a very niche use case. To avoid this, the `asan_symbolize.py` can
now load external code via a new `--plugins` command line option.

These plugins are loaded before main command line argument parsing so
that they can add their own command line options.

Right now the only hook into the behaviour of symbolization is the
`filter_binary_path()` function which assumes a very similar role
to the `binary_name_filter` function that was previously in the code.
We can add more hooks as necessary.

Code in the `asan_symbolize.py` script does not call plugin code
directly. Instead it uses a `AsanSymbolizerPlugInProxy` object.
This object

* Loads plugins from files.
* Manages the lifetime of the plugins.
* Provides an interface for calling into plugin functions and handles
  calling into multiple plugins.

To unify the way binary paths are filtered the old `sysroot_path_filter`
function (and associated code) has been turned into a simple plugin
(`SysRootFilterPlugIn`) that is always loaded. The plugin unloads
itself if the `-s` option is not present on the command line. Users
should not see any functional change relating to this command line
option.

Some simple tests are provided to illustrate what plugin code looks
like and also to check the functionality continues to work.

rdar://problem/49476995

Reviewers: kubamracek, yln, samsonov, dvyukov, vitalybuka

Subscribers: srhines, #sanitizers, llvm-commits

Tags: #llvm, #sanitizers

Differential Revision: https://reviews.llvm.org/D60529

llvm-svn: 358657
This commit is contained in:
Dan Liew 2019-04-18 11:34:31 +00:00
parent a630b34057
commit 403d3187a7
4 changed files with 283 additions and 48 deletions

View File

@ -9,6 +9,15 @@
"""
Example of use:
asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" -s "$HOME/SymbolFiles" < asan.log
PLUGINS
This script provides a way for external plug-ins to hook into the behaviour of
various parts of this script (see `--plugins`). This is useful for situations
where it is necessary to handle site-specific quirks (e.g. binaries with debug
symbols only accessible via a remote service) without having to modify the
script itself.
"""
import argparse
import bisect
@ -22,8 +31,6 @@ import sys
symbolizers = {}
demangle = False
binutils_prefix = None
sysroot_path = None
binary_name_filter = None
fix_filename_patterns = None
logfile = sys.stdin
allow_system_symbolizer = True
@ -38,9 +45,6 @@ def fix_filename(file_name):
file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
return file_name
def sysroot_path_filter(binary_name):
return sysroot_path + binary_name
def is_valid_arch(s):
return s in ["i386", "x86_64", "x86_64h", "arm", "armv6", "armv7", "armv7s",
"armv7k", "arm64", "powerpc64", "powerpc64le", "s390x", "s390"]
@ -361,7 +365,8 @@ class BreakpadSymbolizer(Symbolizer):
class SymbolizationLoop(object):
def __init__(self, binary_name_filter=None, dsym_hint_producer=None):
def __init__(self, plugin_proxy=None, dsym_hint_producer=None):
self.plugin_proxy = plugin_proxy
if sys.platform == 'win32':
# ASan on Windows uses dbghelp.dll to symbolize in-process, which works
# even in sandboxed processes. Nothing needs to be done here.
@ -369,7 +374,6 @@ class SymbolizationLoop(object):
else:
# Used by clients who may want to supply a different binary name.
# E.g. in Chrome several binaries may share a single .dSYM.
self.binary_name_filter = binary_name_filter
self.dsym_hint_producer = dsym_hint_producer
self.system = os.uname()[0]
if self.system not in ['Linux', 'Darwin', 'FreeBSD', 'NetBSD','SunOS']:
@ -469,14 +473,183 @@ class SymbolizationLoop(object):
# Assume that frame #0 is the first frame of new stack trace.
self.frame_no = 0
original_binary = binary
if self.binary_name_filter:
binary = self.binary_name_filter(binary)
binary = self.plugin_proxy.filter_binary_path(binary)
if binary is None:
# The binary filter has told us this binary can't be symbolized.
logging.debug('Skipping symbolication of binary "%s"', original_binary)
return [self.current_line]
symbolized_line = self.symbolize_address(addr, binary, offset, arch)
if not symbolized_line:
if original_binary != binary:
symbolized_line = self.symbolize_address(addr, original_binary, offset, arch)
return self.get_symbolized_lines(symbolized_line)
class AsanSymbolizerPlugInProxy(object):
"""
Serves several purposes:
- Manages the lifetime of plugins (must be used a `with` statement).
- Provides interface for calling into plugins from within this script.
"""
def __init__(self):
self._plugins = [ ]
self._plugin_names = set()
def load_plugin_from_file(self, file_path):
logging.info('Loading plugins from "{}"'.format(file_path))
globals_space = dict(globals())
# Provide function to register plugins
def register_plugin(plugin):
logging.info('Registering plugin %s', plugin.get_name())
self.add_plugin(plugin)
globals_space['register_plugin'] = register_plugin
if sys.version_info.major < 3:
execfile(file_path, globals_space, None)
else:
with open(file_path, 'r') as f:
exec(f.read(), globals_space, None)
def add_plugin(self, plugin):
assert isinstance(plugin, AsanSymbolizerPlugIn)
self._plugins.append(plugin)
self._plugin_names.add(plugin.get_name())
plugin._receive_proxy(self)
def remove_plugin(self, plugin):
assert isinstance(plugin, AsanSymbolizerPlugIn)
self._plugins.remove(plugin)
self._plugin_names.remove(plugin.get_name())
logging.debug('Removing plugin %s', plugin.get_name())
plugin.destroy()
def has_plugin(self, name):
"""
Returns true iff the plugin name is currently
being managed by AsanSymbolizerPlugInProxy.
"""
return name in self._plugin_names
def register_cmdline_args(self, parser):
plugins = list(self._plugins)
for plugin in plugins:
plugin.register_cmdline_args(parser)
def process_cmdline_args(self, pargs):
# Use copy so we can remove items as we iterate.
plugins = list(self._plugins)
for plugin in plugins:
keep = plugin.process_cmdline_args(pargs)
assert isinstance(keep, bool)
if not keep:
self.remove_plugin(plugin)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
for plugin in self._plugins:
plugin.destroy()
# Don't suppress raised exceptions
return False
def _filter_single_value(self, function_name, input_value):
"""
Helper for filter style plugin functions.
"""
new_value = input_value
for plugin in self._plugins:
result = getattr(plugin, function_name)(new_value)
if result is None:
return None
new_value = result
return new_value
def filter_binary_path(self, binary_path):
"""
Consult available plugins to filter the path to a binary
to make it suitable for symbolication.
Returns `None` if symbolication should not be attempted for this
binary.
"""
return self._filter_single_value('filter_binary_path', binary_path)
class AsanSymbolizerPlugIn(object):
"""
This is the interface the `asan_symbolize.py` code uses to talk
to plugins.
"""
@classmethod
def get_name(cls):
"""
Returns the name of the plugin.
"""
return cls.__name__
def _receive_proxy(self, proxy):
assert isinstance(proxy, AsanSymbolizerPlugInProxy)
self.proxy = proxy
def register_cmdline_args(self, parser):
"""
Hook for registering command line arguments to be
consumed in `process_cmdline_args()`.
`parser` - Instance of `argparse.ArgumentParser`.
"""
pass
def process_cmdline_args(self, pargs):
"""
Hook for handling parsed arguments. Implementations
should not modify `pargs`.
`pargs` - Instance of `argparse.Namespace` containing
parsed command line arguments.
Return `True` if plug-in should be used, otherwise
return `False`.
"""
return True
def destroy(self):
"""
Hook called when a plugin is about to be destroyed.
Implementations should free any allocated resources here.
"""
pass
# Symbolization hooks
def filter_binary_path(self, binary_path):
"""
Given a binary path return a binary path suitable for symbolication.
Implementations should return `None` if symbolication of this binary
should be skipped.
"""
return binary_path
class SysRootFilterPlugIn(AsanSymbolizerPlugIn):
"""
Simple plug-in to add sys root prefix to all binary paths
used for symbolication.
"""
def __init__(self):
self.sysroot_path = ""
def register_cmdline_args(self, parser):
parser.add_argument('-s', dest='sys_root', metavar='SYSROOT',
help='set path to sysroot for sanitized binaries')
def process_cmdline_args(self, pargs):
if pargs.sys_root is None:
# Not being used so remove ourselves.
return False
self.sysroot_path = pargs.sys_root
return True
def filter_binary_path(self, path):
return self.sysroot_path + path
def add_logging_args(parser):
parser.add_argument('--log-dest',
default=None,
@ -515,45 +688,59 @@ def setup_logging():
)
return unparsed_args
def add_load_plugin_args(parser):
parser.add_argument('-p', '--plugins',
help='Load plug-in', nargs='+', default=[])
def setup_plugins(plugin_proxy, args):
parser = argparse.ArgumentParser(add_help=False)
add_load_plugin_args(parser)
pargs , unparsed_args = parser.parse_known_args()
for plugin_path in pargs.plugins:
plugin_proxy.load_plugin_from_file(plugin_path)
# Add built-in plugins.
plugin_proxy.add_plugin(SysRootFilterPlugIn())
return unparsed_args
if __name__ == '__main__':
remaining_args = setup_logging()
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description='ASan symbolization script',
epilog=__doc__)
parser.add_argument('path_to_cut', nargs='*',
help='pattern to be cut from the result file path ')
parser.add_argument('-d','--demangle', action='store_true',
help='demangle function names')
parser.add_argument('-s', metavar='SYSROOT',
help='set path to sysroot for sanitized binaries')
parser.add_argument('-c', metavar='CROSS_COMPILE',
help='set prefix for binutils')
parser.add_argument('-l','--logfile', default=sys.stdin,
type=argparse.FileType('r'),
help='set log file name to parse, default is stdin')
parser.add_argument('--force-system-symbolizer', action='store_true',
help='don\'t use llvm-symbolizer')
# Add logging arguments so that `--help` shows them.
add_logging_args(parser)
args = parser.parse_args(remaining_args)
if args.path_to_cut:
fix_filename_patterns = args.path_to_cut
if args.demangle:
demangle = True
if args.s:
binary_name_filter = sysroot_path_filter
sysroot_path = args.s
if args.c:
binutils_prefix = args.c
if args.logfile:
logfile = args.logfile
else:
logfile = sys.stdin
if args.force_system_symbolizer:
force_system_symbolizer = True
if force_system_symbolizer:
assert(allow_system_symbolizer)
loop = SymbolizationLoop(binary_name_filter)
loop.process_logfile()
with AsanSymbolizerPlugInProxy() as plugin_proxy:
remaining_args = setup_plugins(plugin_proxy, remaining_args)
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description='ASan symbolization script',
epilog=__doc__)
parser.add_argument('path_to_cut', nargs='*',
help='pattern to be cut from the result file path ')
parser.add_argument('-d','--demangle', action='store_true',
help='demangle function names')
parser.add_argument('-c', metavar='CROSS_COMPILE',
help='set prefix for binutils')
parser.add_argument('-l','--logfile', default=sys.stdin,
type=argparse.FileType('r'),
help='set log file name to parse, default is stdin')
parser.add_argument('--force-system-symbolizer', action='store_true',
help='don\'t use llvm-symbolizer')
# Add logging arguments so that `--help` shows them.
add_logging_args(parser)
# Add load plugin arguments so that `--help` shows them.
add_load_plugin_args(parser)
plugin_proxy.register_cmdline_args(parser)
args = parser.parse_args(remaining_args)
plugin_proxy.process_cmdline_args(args)
if args.path_to_cut:
fix_filename_patterns = args.path_to_cut
if args.demangle:
demangle = True
if args.c:
binutils_prefix = args.c
if args.logfile:
logfile = args.logfile
else:
logfile = sys.stdin
if args.force_system_symbolizer:
force_system_symbolizer = True
if force_system_symbolizer:
assert(allow_system_symbolizer)
loop = SymbolizationLoop(plugin_proxy)
loop.process_logfile()

View File

@ -0,0 +1,17 @@
class NoOpPlugin(AsanSymbolizerPlugIn):
def register_cmdline_args(self, parser):
logging.info('Adding --unlikely-option-name-XXX option')
parser.add_argument('--unlikely-option-name-XXX', type=int, default=0)
def process_cmdline_args(self, pargs):
logging.info('GOT --unlikely-option-name-XXX=%d', pargs.unlikely_option_name_XXX)
return True
def destroy(self):
logging.info('destroy() called on NoOpPlugin')
def filter_binary_path(self, path):
logging.info('filter_binary_path called in NoOpPlugin')
return path
register_plugin(NoOpPlugin())

View File

@ -0,0 +1,7 @@
// Check help output.
// RUN: %asan_symbolize --log-level info --plugins %S/plugin_no_op.py --help 2>&1 | FileCheck %s
// CHECK: Registering plugin NoOpPlugin
// CHECK: Adding --unlikely-option-name-XXX option
// CHECK: optional arguments:
// CHECK: --unlikely-option-name-XXX

View File

@ -0,0 +1,24 @@
// UNSUPPORTED: ios, android
// Check plugin command line args get parsed and that plugin functions get called as expected.
// RUN: %clangxx_asan -O0 -g %s -o %t.executable
// RUN: not %env_asan_opts=symbolize=0 %run %t.executable > %t.log 2>&1
// RUN: %asan_symbolize --plugins %S/plugin_no_op.py --log-level info -l %t.log --unlikely-option-name-XXX=15 2>&1 | FileCheck %s
// CHECK: GOT --unlikely-option-name-XXX=15
// CHECK: filter_binary_path called in NoOpPlugin
// CHECK: destroy() called on NoOpPlugin
#include <cstdlib>
extern "C" void foo(int* a) {
*a = 5;
}
int main() {
int* a = (int*) malloc(sizeof(int));
if (!a)
return 0;
free(a);
foo(a);
return 0;
}