Patchwork [3,of,4,hyperblame] annotate: add core algorithm to skip a rev

login
register
mail settings
Submitter Siddharth Agarwal
Date May 25, 2017, 2:39 a.m.
Message ID <6eb72de440f311e7aa5f0002c991e86a-964d56b0@7511894063d3764ff01ea8111f5a004d7dd700ed078797c204a24e620ddb965c>
Download mbox | patch
Permalink /patch/20893/
State Accepted
Headers show

Comments

Siddharth Agarwal - May 25, 2017, 2:39 a.m.
# HG changeset patch
# User Siddharth Agarwal <sid0@fb.com>
# Date 1495678034 25200
#      Wed May 24 19:07:14 2017 -0700
# Node ID 134dcc1222e4b992b4a60c44c9b9ed8d10422632
# Parent  0f28e90b0fb95733de10f8d1524c345fe5949315
annotate: add core algorithm to skip a rev

The core algorithm is inspired by git hyper-blame, implemented at
https://chromium.googlesource.com/chromium/tools/depot_tools.git/+/master/git_hyper_blame.py.

The heuristic is as documented in the comments.
Yuya Nishihara - May 28, 2017, 11:47 a.m.
On Wed, 24 May 2017 19:39:49 -0700, Siddharth Agarwal wrote:
> # HG changeset patch
> # User Siddharth Agarwal <sid0@fb.com>
> # Date 1495678034 25200
> #      Wed May 24 19:07:14 2017 -0700
> # Node ID 134dcc1222e4b992b4a60c44c9b9ed8d10422632
> # Parent  0f28e90b0fb95733de10f8d1524c345fe5949315
> annotate: add core algorithm to skip a rev

Looks great. Queued the series, thanks.

For UI thing, I like Greg's revset idea, but the core algorithm will be
useful anyway.

> +    >>> def decorate(text, rev):
> +    ...     return ([(rev, i) for i in xrange(1, text.count('\n') + 1)], text)

It isn't introduced by this patch, but I was tricked by this. rev isn't
a revision number but a filectx!

>      for parent, blocks in pblocks:
>          for (a1, a2, b1, b2), t in blocks:
> -            # Changed blocks ('!') or blocks made only of blank lines ('~')
> -            # belong to the child.
> +            # Changed blocks ('!') or blocks made only of blank lines
> +            # ('~') belong to the child.

Dropped this hunk in flight.

Patch

diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -1044,7 +1044,8 @@  class basefilectx(object):
             if ready:
                 visit.pop()
                 curr = decorate(f.data(), f)
-                curr = _annotatepair([hist[p] for p in pl], curr, diffopts)
+                curr = _annotatepair([hist[p] for p in pl], f, curr, False,
+                                     diffopts)
                 for p in pl:
                     if needed[p] == 1:
                         del hist[p]
@@ -1073,17 +1074,114 @@  class basefilectx(object):
             c = visit.pop(max(visit))
             yield c
 
-def _annotatepair(parents, child, diffopts):
+def _annotatepair(parents, childfctx, child, skipchild, diffopts):
+    r'''
+    Given parent and child fctxes and annotate data for parents, for all lines
+    in either parent that match the child, annotate the child with the parent's
+    data.
+
+    Additionally, if `skipchild` is True, replace all other lines with parent
+    annotate data as well such that child is never blamed for any lines.
+
+    >>> oldfctx = 'old'
+    >>> p1fctx, p2fctx, childfctx = 'p1', 'p2', 'c'
+    >>> olddata = 'a\nb\n'
+    >>> p1data = 'a\nb\nc\n'
+    >>> p2data = 'a\nc\nd\n'
+    >>> childdata = 'a\nb2\nc\nc2\nd\n'
+    >>> diffopts = mdiff.diffopts()
+
+    >>> def decorate(text, rev):
+    ...     return ([(rev, i) for i in xrange(1, text.count('\n') + 1)], text)
+
+    Basic usage:
+
+    >>> oldann = decorate(olddata, oldfctx)
+    >>> p1ann = decorate(p1data, p1fctx)
+    >>> p1ann = _annotatepair([oldann], p1fctx, p1ann, False, diffopts)
+    >>> p1ann[0]
+    [('old', 1), ('old', 2), ('p1', 3)]
+    >>> p2ann = decorate(p2data, p2fctx)
+    >>> p2ann = _annotatepair([oldann], p2fctx, p2ann, False, diffopts)
+    >>> p2ann[0]
+    [('old', 1), ('p2', 2), ('p2', 3)]
+
+    Test with multiple parents (note the difference caused by ordering):
+
+    >>> childann = decorate(childdata, childfctx)
+    >>> childann = _annotatepair([p1ann, p2ann], childfctx, childann, False,
+    ...                          diffopts)
+    >>> childann[0]
+    [('old', 1), ('c', 2), ('p2', 2), ('c', 4), ('p2', 3)]
+
+    >>> childann = decorate(childdata, childfctx)
+    >>> childann = _annotatepair([p2ann, p1ann], childfctx, childann, False,
+    ...                          diffopts)
+    >>> childann[0]
+    [('old', 1), ('c', 2), ('p1', 3), ('c', 4), ('p2', 3)]
+
+    Test with skipchild (note the difference caused by ordering):
+
+    >>> childann = decorate(childdata, childfctx)
+    >>> childann = _annotatepair([p1ann, p2ann], childfctx, childann, True,
+    ...                          diffopts)
+    >>> childann[0]
+    [('old', 1), ('old', 2), ('p2', 2), ('p2', 2), ('p2', 3)]
+
+    >>> childann = decorate(childdata, childfctx)
+    >>> childann = _annotatepair([p2ann, p1ann], childfctx, childann, True,
+    ...                          diffopts)
+    >>> childann[0]
+    [('old', 1), ('old', 2), ('p1', 3), ('p1', 3), ('p2', 3)]
+    '''
     pblocks = [(parent, mdiff.allblocks(parent[1], child[1], opts=diffopts))
                for parent in parents]
+
+    if skipchild:
+        # Need to iterate over the blocks twice -- make it a list
+        pblocks = [(p, list(blocks)) for (p, blocks) in pblocks]
     # Mercurial currently prefers p2 over p1 for annotate.
     # TODO: change this?
     for parent, blocks in pblocks:
         for (a1, a2, b1, b2), t in blocks:
-            # Changed blocks ('!') or blocks made only of blank lines ('~')
-            # belong to the child.
+            # Changed blocks ('!') or blocks made only of blank lines
+            # ('~') belong to the child.
             if t == '=':
                 child[0][b1:b2] = parent[0][a1:a2]
+
+    if skipchild:
+        # Now try and match up anything that couldn't be matched,
+        # Reversing pblocks maintains bias towards p2, matching above
+        # behavior.
+        pblocks.reverse()
+
+        # The heuristics are:
+        # * Work on blocks of changed lines (effectively diff hunks with -U0).
+        # This could potentially be smarter but works well enough.
+        # * For a non-matching section, do a best-effort fit. Match lines in
+        #   diff hunks 1:1, dropping lines as necessary.
+        # * Repeat the last line as a last resort.
+
+        # First, replace as much as possible without repeating the last line.
+        remaining = [(parent, []) for parent, _blocks in pblocks]
+        for idx, (parent, blocks) in enumerate(pblocks):
+            for (a1, a2, b1, b2), _t in blocks:
+                if a2 - a1 >= b2 - b1:
+                    for bk in xrange(b1, b2):
+                        if child[0][bk][0] == childfctx:
+                            ak = min(a1 + (bk - b1), a2 - 1)
+                            child[0][bk] = parent[0][ak]
+                else:
+                    remaining[idx][1].append((a1, a2, b1, b2))
+
+        # Then, look at anything left, which might involve repeating the last
+        # line.
+        for parent, blocks in remaining:
+            for a1, a2, b1, b2 in blocks:
+                for bk in xrange(b1, b2):
+                    if child[0][bk][0] == childfctx:
+                        ak = min(a1 + (bk - b1), a2 - 1)
+                        child[0][bk] = parent[0][ak]
     return child
 
 class filectx(basefilectx):
diff --git a/tests/test-doctest.py b/tests/test-doctest.py
--- a/tests/test-doctest.py
+++ b/tests/test-doctest.py
@@ -25,6 +25,7 @@  testmod('mercurial.changegroup')
 testmod('mercurial.changelog')
 testmod('mercurial.color')
 testmod('mercurial.config')
+testmod('mercurial.context')
 testmod('mercurial.dagparser', optionflags=doctest.NORMALIZE_WHITESPACE)
 testmod('mercurial.dispatch')
 testmod('mercurial.encoding')