Tools/scripts/diff-2o.py (81 lines of code) (raw):
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates. (http://www.facebook.com)
"""
Produces an approximate second-order diff of the current commit and the linked
diff in Phabricator. This is useful for seeing roughly what you changed since
you last uploaded a commit.
This is often much clearer than doing something like:
git diff HEAD <commit uploaded to Phabricator>
as it only looks at what changed between the two commit diffs. This excludes
changes which are the result of the local commit being rebased, but have
nothing to do with what you actually changed.
The output is only approximate beacuse there's generally not information to
fully reconstruct the changes by just looking at two diffs. However, most of the
time it gives a pretty good hint to a human what changed.
"""
import difflib
import re
import shlex
import subprocess
from typing import Dict, List, Union
def _run(cmd: Union[str, List[str]]) -> List[str]:
"""Run a 'cmd', returning stdout as a list of strings."""
cmd_list = shlex.split(cmd) if type(cmd) == str else cmd
result = subprocess.run(cmd_list, capture_output=True)
return result.stdout.decode('utf-8').split("\n")
def _split_diff_to_context_free_file_diffs(
diff: List[str], src: str) -> Dict[str, List[str]]:
"""Take a noisy diff-like input covering many files and return a purified
map of file name -> hunk changes. Strips out all line info, metadata, and
context along the way. E.g. for input:
Summary
diff --git a/dir/fileXYZ b/dir/fileXYZ
--- a/dir/fileXYZ
+++ b/dir/fileXYZ
@@ -1,3 +1,3 @@ inferred function context
context
-badline
+goodline
context
Produce:
{"fileXYZ": [
"@@",
"-badline",
"+goodline",
]}
"""
res: Dict[str, List[str]] = {}
current_diff_file_lines: List[str] = []
line_n = 0
while line_n < len(diff):
line = diff[line_n]
m = re.match(r'^--- a/(.*)$', line)
if m:
current_diff_file_lines = []
filename = m.group(1)
res[filename] = current_diff_file_lines
if line_n > len(diff) - 1:
raise Exception(f'{src}:{line_n} - missing +++ after ---')
line_n += 1
line = diff[line_n]
if not re.match(rf'^\+\+\+ b/{filename}', line):
raise Exception(f'{src}:{line_n} - invalid +++ line after ---')
else:
if line:
if line[:2] == "@@":
# Some tools add inferred context to their @@ lines e.g. the
# function the hunk appears in and other tools do not. As we
# don't use line info anyway, simply strip all context.
current_diff_file_lines.append("@@")
elif line[0] in ['+', '-']:
current_diff_file_lines.append(diff[line_n])
line_n += 1
return res
def _do_diff_2o(diff_a: str, src_a: str, diff_b: str, src_b: str) -> None:
file_diffs_a = _split_diff_to_context_free_file_diffs(diff_a, src_a)
file_diffs_b = _split_diff_to_context_free_file_diffs(diff_b, src_b)
modifiedfiles = set(file_diffs_a) & set(file_diffs_b)
for f in modifiedfiles:
diff_lines = difflib.unified_diff(file_diffs_a[f], file_diffs_b[f], n=0)
# Turn from generator to list and skip first --- and +++ lines
diff_lines = list(diff_lines)[2:]
i = 0
changelines = []
while i < len(diff_lines):
line = diff_lines[i]
i += 1
if line[:2] == "++":
changelines.append("+" + line[2:])
elif line[:2] == "+-":
changelines.append("-" + line[2:])
elif line[:2] == "-+":
changelines.append("-" + line[2:])
elif line[:2] == "--":
changelines.append("+" + line[2:])
elif line[:2] == "@@" or line[1:3] == "@@":
if len(changelines) < 1 or changelines[-1] != "...\n":
changelines.append("...\n")
else:
changelines.append(line)
if len(changelines):
print(f"Changed: {f}")
for line in changelines:
print(f"| {line.strip()}")
wholefilechanges = set(file_diffs_a) ^ set(file_diffs_b)
for f in wholefilechanges:
print(f"Added/removed: {f}")
def main() -> None:
# Get current commit from local repo as a diff
git_full_diff = _run("git show")
# Extract Phabricator diff number
diff_n = None
for line in git_full_diff:
m = re.match(r'.*https://.*/(D\d+)$', line)
if m:
diff_n = m.group(1)
break
if not diff_n:
raise Exception('Could not find Phabricator diff from Git commit')
# Dowload diff from Phabriactor
phab_full_diff = _run(f"jf export --diff {diff_n}")
_do_diff_2o(phab_full_diff, "phab_full_diff", git_full_diff, "git")
if __name__ == "__main__":
main()