diff --git a/git_common.py b/git_common.py
index d571f820b7..949ba4695e 100644
--- a/git_common.py
+++ b/git_common.py
@@ -374,6 +374,10 @@ def del_config(option, scope='local'):
pass
+def diff(oldrev, newrev, *args):
+ return run('diff', oldrev, newrev, *args)
+
+
def freeze():
took_action = False
diff --git a/git_hyper_blame.py b/git_hyper_blame.py
index 17424511ab..5a7daa0cf5 100755
--- a/git_hyper_blame.py
+++ b/git_hyper_blame.py
@@ -149,6 +149,110 @@ def get_parsed_blame(filename, revision='HEAD'):
return list(parse_blame(blame))
+# Map from (oldrev, newrev) to hunk list (caching the results of git diff, but
+# only the hunk line numbers, not the actual diff contents).
+# hunk list contains (old, new) pairs, where old and new are (start, length)
+# pairs. A hunk list can also be None (if the diff failed).
+diff_hunks_cache = {}
+
+
+def cache_diff_hunks(oldrev, newrev):
+ def parse_start_length(s):
+ # Chop the '-' or '+'.
+ s = s[1:]
+ # Length is optional (defaults to 1).
+ try:
+ start, length = s.split(',')
+ except ValueError:
+ start = s
+ length = 1
+ return int(start), int(length)
+
+ try:
+ return diff_hunks_cache[(oldrev, newrev)]
+ except KeyError:
+ pass
+
+ # Use -U0 to get the smallest possible hunks.
+ diff = git_common.diff(oldrev, newrev, '-U0')
+
+ # Get all the hunks.
+ hunks = []
+ for line in diff.split('\n'):
+ if not line.startswith('@@'):
+ continue
+ ranges = line.split(' ', 3)[1:3]
+ ranges = tuple(parse_start_length(r) for r in ranges)
+ hunks.append(ranges)
+
+ diff_hunks_cache[(oldrev, newrev)] = hunks
+ return hunks
+
+
+def approx_lineno_across_revs(filename, newfilename, revision, newrevision,
+ lineno):
+ """Computes the approximate movement of a line number between two revisions.
+
+ Consider line |lineno| in |filename| at |revision|. This function computes the
+ line number of that line in |newfilename| at |newrevision|. This is
+ necessarily approximate.
+
+ Args:
+ filename: The file (within the repo) at |revision|.
+ newfilename: The name of the same file at |newrevision|.
+ revision: A git revision.
+ newrevision: Another git revision. Note: Can be ahead or behind |revision|.
+ lineno: Line number within |filename| at |revision|.
+
+ Returns:
+ Line number within |newfilename| at |newrevision|.
+ """
+ # This doesn't work that well if there are a lot of line changes within the
+ # hunk (demonstrated by GitHyperBlameLineMotionTest.testIntraHunkLineMotion).
+ # A fuzzy heuristic that takes the text of the new line and tries to find a
+ # deleted line within the hunk that mostly matches the new line could help.
+
+ # Use the
When a line skips over an ignored commit, a guess is made as to which commit +previously modified that line, but it is not always clear where the line came +from. If the ignored commit makes lots of changes in close proximity, in +particular adding/removing/reordering lines, then the wrong authors may be +blamed for nearby edits.
For this reason, hyper-blame works best when the ignored commits are be
+limited to minor changes such as formatting and renaming, not refactoring or
+other more invasive changes.
-When a commit is ignored, hyper-blame currently just blames the same line in - the previous version of the file. This can be wildly inaccurate if the ignored - commit adds or removes lines, resulting in a completely wrong commit being - blamed. -
-There is currently no way to pass the ignore list as a file.