clang-format: Add an option to git-clang-format to diff between to commits

Summary:
When building pre-upload hooks using git-clang-format, it is useful to limit the scope to a diff of two commits (instead of from a commit against the working tree) to allow for less false positives in dependent commits.

This change adds the option of specifying two git commits to git-clang-format when using the `--diff` flag, which uses a different strategy to diff (using `git-diff-tree` instead of `git-diff-index`), and runs clang-format against the second commit instead of the working directory.

There is a slight backwards-incompatibility introduced with this change: if a filename matches a branch name or other commit-ish, then `git clang-format <commit> <file>` will no longer work as expected; use `git clang-format <commit> -- <file>` instead.

Patch by Luis Hector Chavez!

Reviewers: djasper, lodato

Subscribers: lodato, cfe-commits, srhines

Projects: #clang-c

Differential Revision: https://reviews.llvm.org/D24319

llvm-svn: 282136
This commit is contained in:
Stephen Hines 2016-09-22 05:52:55 +00:00
parent a3174853b7
commit 90ced94b22
1 changed files with 97 additions and 50 deletions

View File

@ -32,12 +32,15 @@ import re
import subprocess
import sys
usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
desc = '''
Run clang-format on all lines that differ between the working directory
and <commit>, which defaults to HEAD. Changes are only applied to the working
directory.
If zero or one commits are given, run clang-format on all lines that differ
between the working directory and <commit>, which defaults to HEAD. Changes are
only applied to the working directory.
If two commits are given (requires --diff), run clang-format on all lines in the
second <commit> that differ from the first <commit>.
The following git-config settings set the default of the corresponding option:
clangFormat.binary
@ -121,8 +124,14 @@ def main():
opts.verbose -= opts.quiet
del opts.quiet
commit, files = interpret_args(opts.args, dash_dash, opts.commit)
changed_lines = compute_diff_and_extract_lines(commit, files)
commits, files = interpret_args(opts.args, dash_dash, opts.commit)
if len(commits) > 1:
if not opts.diff:
die('--diff is required when two commits are given')
else:
if len(commits) > 2:
die('at most two commits allowed; %d given' % len(commits))
changed_lines = compute_diff_and_extract_lines(commits, files)
if opts.verbose >= 1:
ignored_files = set(changed_lines)
filter_by_extension(changed_lines, opts.extensions.lower().split(','))
@ -142,10 +151,17 @@ def main():
# The computed diff outputs absolute paths, so we must cd before accessing
# those files.
cd_to_toplevel()
old_tree = create_tree_from_workdir(changed_lines)
new_tree = run_clang_format_and_save_to_tree(changed_lines,
binary=opts.binary,
style=opts.style)
if len(commits) > 1:
old_tree = commits[1]
new_tree = run_clang_format_and_save_to_tree(changed_lines,
revision=commits[1],
binary=opts.binary,
style=opts.style)
else:
old_tree = create_tree_from_workdir(changed_lines)
new_tree = run_clang_format_and_save_to_tree(changed_lines,
binary=opts.binary,
style=opts.style)
if opts.verbose >= 1:
print 'old tree:', old_tree
print 'new tree:', new_tree
@ -182,40 +198,41 @@ def load_git_config(non_string_options=None):
def interpret_args(args, dash_dash, default_commit):
"""Interpret `args` as "[commit] [--] [files...]" and return (commit, files).
"""Interpret `args` as "[commits] [--] [files]" and return (commits, files).
It is assumed that "--" and everything that follows has been removed from
args and placed in `dash_dash`.
If "--" is present (i.e., `dash_dash` is non-empty), the argument to its
left (if present) is taken as commit. Otherwise, the first argument is
checked if it is a commit or a file. If commit is not given,
`default_commit` is used."""
If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
left (if present) are taken as commits. Otherwise, the arguments are checked
from left to right if they are commits or files. If commits are not given,
a list with `default_commit` is used."""
if dash_dash:
if len(args) == 0:
commit = default_commit
elif len(args) > 1:
die('at most one commit allowed; %d given' % len(args))
commits = [default_commit]
else:
commit = args[0]
object_type = get_object_type(commit)
if object_type not in ('commit', 'tag'):
if object_type is None:
die("'%s' is not a commit" % commit)
else:
die("'%s' is a %s, but a commit was expected" % (commit, object_type))
commits = args
for commit in commits:
object_type = get_object_type(commit)
if object_type not in ('commit', 'tag'):
if object_type is None:
die("'%s' is not a commit" % commit)
else:
die("'%s' is a %s, but a commit was expected" % (commit, object_type))
files = dash_dash[1:]
elif args:
if disambiguate_revision(args[0]):
commit = args[0]
files = args[1:]
else:
commit = default_commit
files = args
commits = []
while args:
if not disambiguate_revision(args[0]):
break
commits.append(args.pop(0))
if not commits:
commits = [default_commit]
files = args
else:
commit = default_commit
commits = [default_commit]
files = []
return commit, files
return commits, files
def disambiguate_revision(value):
@ -243,9 +260,9 @@ def get_object_type(value):
return stdout.strip()
def compute_diff_and_extract_lines(commit, files):
def compute_diff_and_extract_lines(commits, files):
"""Calls compute_diff() followed by extract_lines()."""
diff_process = compute_diff(commit, files)
diff_process = compute_diff(commits, files)
changed_lines = extract_lines(diff_process.stdout)
diff_process.stdout.close()
diff_process.wait()
@ -255,13 +272,17 @@ def compute_diff_and_extract_lines(commit, files):
return changed_lines
def compute_diff(commit, files):
"""Return a subprocess object producing the diff from `commit`.
def compute_diff(commits, files):
"""Return a subprocess object producing the diff from `commits`.
The return value's `stdin` file object will produce a patch with the
differences between the working directory and `commit`, filtered on `files`
(if non-empty). Zero context lines are used in the patch."""
cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
differences between the working directory and the first commit if a single
one was specified, or the difference between both specified commits, filtered
on `files` (if non-empty). Zero context lines are used in the patch."""
git_tool = 'diff-index'
if len(commits) > 1:
git_tool = 'diff-tree'
cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
cmd.extend(files)
p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
p.stdin.close()
@ -318,15 +339,17 @@ def create_tree_from_workdir(filenames):
return create_tree(filenames, '--stdin')
def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format',
style=None):
def run_clang_format_and_save_to_tree(changed_lines, revision=None,
binary='clang-format', style=None):
"""Run clang-format on each file and save the result to a git tree.
Returns the object ID (SHA-1) of the created tree."""
def index_info_generator():
for filename, line_ranges in changed_lines.iteritems():
mode = oct(os.stat(filename).st_mode)
blob_id = clang_format_to_blob(filename, line_ranges, binary=binary,
blob_id = clang_format_to_blob(filename, line_ranges,
revision=revision,
binary=binary,
style=style)
yield '%s %s\t%s' % (mode, blob_id, filename)
return create_tree(index_info_generator(), '--index-info')
@ -352,26 +375,42 @@ def create_tree(input_lines, mode):
return tree_id
def clang_format_to_blob(filename, line_ranges, binary='clang-format',
style=None):
def clang_format_to_blob(filename, line_ranges, revision=None,
binary='clang-format', style=None):
"""Run clang-format on the given file and save the result to a git blob.
Runs on the file in `revision` if not None, or on the file in the working
directory if `revision` is None.
Returns the object ID (SHA-1) of the created blob."""
clang_format_cmd = [binary, filename]
clang_format_cmd = [binary]
if style:
clang_format_cmd.extend(['-style='+style])
clang_format_cmd.extend([
'-lines=%s:%s' % (start_line, start_line+line_count-1)
for start_line, line_count in line_ranges])
if revision:
clang_format_cmd.extend(['-assume-filename='+filename])
git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
stdout=subprocess.PIPE)
git_show.stdin.close()
clang_format_stdin = git_show.stdout
else:
clang_format_cmd.extend([filename])
git_show = None
clang_format_stdin = subprocess.PIPE
try:
clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE,
clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
stdout=subprocess.PIPE)
if clang_format_stdin == subprocess.PIPE:
clang_format_stdin = clang_format.stdin
except OSError as e:
if e.errno == errno.ENOENT:
die('cannot find executable "%s"' % binary)
else:
raise
clang_format.stdin.close()
clang_format_stdin.close()
hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
stdout=subprocess.PIPE)
@ -381,6 +420,8 @@ def clang_format_to_blob(filename, line_ranges, binary='clang-format',
die('`%s` failed' % ' '.join(hash_object_cmd))
if clang_format.wait() != 0:
die('`%s` failed' % ' '.join(clang_format_cmd))
if git_show and git_show.wait() != 0:
die('`%s` failed' % ' '.join(git_show_cmd))
return stdout.rstrip('\r\n')
@ -419,7 +460,12 @@ def print_diff(old_tree, new_tree):
# We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
# is expected to be viewed by the user, and only the former does nice things
# like color and pagination.
subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
#
# We also only print modified files since `new_tree` only contains the files
# that were modified, so unmodified files would show as deleted without the
# filter.
subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
'--'])
def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
@ -427,7 +473,8 @@ def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
Bails if there are local changes in those files and not `force`. If
`patch_mode`, runs `git checkout --patch` to select hunks interactively."""
changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree,
changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
'--name-only', old_tree,
new_tree).rstrip('\0').split('\0')
if not force:
unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)