Patchwork [1,of,6] rebase: rewrite defineparents

login
register
mail settings
Submitter Jun Wu
Date July 9, 2017, 7:14 p.m.
Message ID <5a13cf09bacd7912367c.1499627688@x1c>
Download mbox | patch
Permalink /patch/22176/
State Superseded
Headers show

Comments

Jun Wu - July 9, 2017, 7:14 p.m.
# HG changeset patch
# User Jun Wu <quark@fb.com>
# Date 1499568916 25200
#      Sat Jul 08 19:55:16 2017 -0700
# Node ID 5a13cf09bacd7912367c4f15b73920fadaf0a457
# Parent  1e872b08a4e93b21e60cba695e4022bf71d4acf4
# Available At https://bitbucket.org/quark-zju/hg-draft
#              hg pull https://bitbucket.org/quark-zju/hg-draft -r 5a13cf09bacd
rebase: rewrite defineparents

The old code has too many tech debts (like outdated comments, confusing
assertions, etc) and is very error-prone to be improved. This patch rewrites
"defineparents" to make future changes easier to reason about.

Patch

diff --git a/hgext/rebase.py b/hgext/rebase.py
--- a/hgext/rebase.py
+++ b/hgext/rebase.py
@@ -67,5 +67,4 @@  revprecursor = -4
 # plain prune (no successor)
 revpruned = -5
-revskipped = (revignored, revprecursor, revpruned)
 
 cmdtable = {}
@@ -391,8 +390,5 @@  class rebaseruntime(object):
                 ui.progress(_("rebasing"), pos, ("%d:%s" % (rev, ctx)),
                             _('changesets'), total)
-                p1, p2, base = defineparents(repo, rev, self.dest,
-                                             self.state,
-                                             self.destancestors,
-                                             self.obsoletenotrebased)
+                p1, p2, base = defineparents(repo, rev, self.dest, self.state)
                 self.storestatus()
                 storecollapsemsg(repo, self.collapsemsg)
@@ -464,7 +460,5 @@  class rebaseruntime(object):
         if self.collapsef and not self.keepopen:
             p1, p2, _base = defineparents(repo, min(self.state),
-                                          self.dest, self.state,
-                                          self.destancestors,
-                                          self.obsoletenotrebased)
+                                          self.dest, self.state)
             editopt = opts.get('edit')
             editform = 'rebase.collapse'
@@ -884,13 +878,4 @@  def rebasenode(repo, rev, p1, base, stat
     return stats
 
-def nearestrebased(repo, rev, state):
-    """return the nearest ancestors of rev in the rebase result"""
-    rebased = [r for r in state if state[r] > nullmerge]
-    candidates = repo.revs('max(%ld  and (::%d))', rebased, rev)
-    if candidates:
-        return state[candidates.first()]
-    else:
-        return None
-
 def _checkobsrebase(repo, ui, rebaseobsrevs, rebasesetrevs, rebaseobsskipped):
     """
@@ -916,105 +901,159 @@  def _checkobsrebase(repo, ui, rebaseobsr
         raise error.Abort(msg % (",".join(divhashes),), hint=h)
 
-def defineparents(repo, rev, dest, state, destancestors,
-                  obsoletenotrebased):
-    'Return the new parent relationship of the revision that will be rebased'
-    parents = repo[rev].parents()
-    p1 = p2 = nullrev
-    rp1 = None
+def adjusteddest(repo, rev, prev, dest, state):
+    """adjust rebase destination given the current rebase state
+
+    rev is being rebased, prev is one of its parent.
+
+    For example, when rebasing E to F:
 
-    p1n = parents[0].rev()
-    if p1n in destancestors:
-        p1 = dest
-    elif p1n in state:
-        if state[p1n] == nullmerge:
-            p1 = dest
-        elif state[p1n] in revskipped:
-            p1 = nearestrebased(repo, p1n, state)
-            if p1 is None:
-                p1 = dest
-        else:
-            p1 = state[p1n]
-    else: # p1n external
-        p1 = dest
-        p2 = p1n
+        B' <- written during the rebase
+        |
+        F <- original destination of B, E
+        |
+        | E <- rev, which is being rebased
+        | |
+        | D <- prev, one parent of rev being checked
+        | |
+        | x <- skipped
+        | |
+        | C <- rebased as C'       C' <- written during the rebase
+        | |                        |
+        | B <- rebased as B'       G <- destination of C, separate subgraph
+        |/
+        A
+
+    The destination will be adjusted from F to B'.
+
+    Besides, adjust dest according to existing rebase information. For example,
+
+      B C D    B wants to be rebased on top of C, C wants to be rebased on top
+       \|/     of D.
+        A
 
-    if len(parents) == 2 and parents[1].rev() not in destancestors:
-        p2n = parents[1].rev()
-        # interesting second parent
-        if p2n in state:
-            if p1 == dest: # p1n in destancestors or external
-                p1 = state[p2n]
-                if p1 == revprecursor:
-                    rp1 = obsoletenotrebased[p2n]
-            elif state[p2n] in revskipped:
-                p2 = nearestrebased(repo, p2n, state)
-                if p2 is None:
-                    # no ancestors rebased yet, detach
-                    p2 = dest
-            else:
-                p2 = state[p2n]
-        else: # p2n external
-            if p2 != nullrev: # p1n external too => rev is a merged revision
-                raise error.Abort(_('cannot use revision %d as base, result '
-                        'would have 3 parents') % rev)
-            p2 = p2n
-    repo.ui.debug(" future parents are %d and %d\n" %
-                            (repo[rp1 or p1].rev(), repo[p2].rev()))
+          C'   After rebasing C, when considering B's dest, replace it with C'.
+          |
+      B   D
+       \ /
+        A
+    """
+    if prev != nullrev:
+        # interesting rebase source - having a same dest and is rebased
+        source = [s for s, d in state.items() if d > 0]
+        candidate = repo.revs('max(%ld and (::%d))', source, prev).first()
+        if candidate is not None:
+            return state[candidate]
+    if dest in state:
+        revstate = state[dest]
+        if revstate == revtodo:
+            raise error.ProgrammingError(
+                'rev %d should not have todo state at this time' % dest)
+        elif revstate > revtodo:
+            return revstate
+    return dest
+
+def successorrevs(repo, rev):
+    """yield revision numbers for successors of rev"""
+    unfi = repo.unfiltered()
+    nodemap = unfi.changelog.nodemap
+    for s in obsutil.allsuccessors(unfi.obsstore, [unfi[rev].node()]):
+        if s in nodemap:
+            yield nodemap[s]
+
+def defineparents(repo, rev, dest, state):
+    'Return the new parent relationship of the revision that will be rebased'
+    cl = repo.changelog
+    def ancestor(rev1, rev2):
+        # take revision numbers instead of nodes
+        return cl.rev(cl.ancestor(cl.node(rev1), cl.node(rev2)))
+
+    oldps = repo.changelog.parentrevs(rev) # old parents
+    newps = list(oldps) # new parents
+    bases = set() # merge base candidates
+    dests = [adjusteddest(repo, rev, p, dest, state) for p in oldps]
+
+    for i, p in enumerate(oldps):
+        # Do not skip nullrev for p1. Otherwise root node cannot be rebased.
+        if p == nullrev and i != 0:
+            continue
 
-    if not any(p.rev() in state for p in parents):
-        # Case (1) root changeset of a non-detaching rebase set.
-        # Let the merge mechanism find the base itself.
-        base = None
-    elif not repo[rev].p2():
-        # Case (2) detaching the node with a single parent, use this parent
-        base = repo[rev].p1().rev()
-    else:
-        # Assuming there is a p1, this is the case where there also is a p2.
-        # We are thus rebasing a merge and need to pick the right merge base.
+        # For non-merge changeset, just move p to adjusted dest as requested.
+        if oldps[1] == nullrev:
+            newps[i] = dests[i]
+
+        # For merge changeset, if we move p to dests[i] unconditionally, both
+        # parents may change and the end result looks like "the merge loses a
+        # parent", which is a surprise. This is a limit because "--dest" only
+        # accepts one dest per src.
         #
-        # Imagine we have:
-        # - M: current rebase revision in this step
-        # - A: one parent of M
-        # - B: other parent of M
-        # - D: destination of this merge step (p1 var)
-        #
-        # Consider the case where D is a descendant of A or B and the other is
-        # 'outside'. In this case, the right merge base is the D ancestor.
-        #
-        # An informal proof, assuming A is 'outside' and B is the D ancestor:
-        #
-        # If we pick B as the base, the merge involves:
-        # - changes from B to M (actual changeset payload)
-        # - changes from B to D (induced by rebase) as D is a rebased
-        #   version of B)
-        # Which exactly represent the rebase operation.
+        # Therefore, only move p with reasonable conditions (in this order):
+        #   - use dest, if dest is a descendent of (p or one of p's successors)
+        #   - use p's rebased result, if p is rebased (state[p] > 0)
         #
-        # If we pick A as the base, the merge involves:
-        # - changes from A to M (actual changeset payload)
-        # - changes from A to D (with include changes between unrelated A and B
-        #   plus changes induced by rebase)
-        # Which does not represent anything sensible and creates a lot of
-        # conflicts. A is thus not the right choice - B is.
+        # That also means, we might ignore user's dest for a merge sometimes.
+        elif any(ancestor(x, dests[i]) == x for x in successorrevs(repo, p)):
+            newps[i] = dests[i]
+        elif p in state and state[p] > 0:
+            newps[i] = state[p]
+
+        # The old parent is a merge base candidate
+        if newps[i] != oldps[i]:
+            bases.add(p)
+
+    # Remove duplicated parent
+    if newps[1] == newps[0]:
+        newps[1] = nullrev
+
+    # Extra checks for merge changesets
+    if newps[1] != nullrev:
+        # If one parent becomes an ancestor of the other, drop the ancestor
+        anc = ancestor(*newps)
+        newps = [p for p in newps if p != anc]
+        assert newps
+        if len(newps) == 1:
+            newps.append(nullrev)
+        elif oldps[0] == newps[0]:
+            # Since we update to p1, swap parents if only p2 gets changed
+            newps.reverse()
+
+    # No parent change might be an error because we fail to make rev a
+    # descendent of requested dest. This can happen, for example:
+    #
+    #   C          rebase -s C -d D
+    #   |\         None of A and B will be changed to D and rebase fails.
+    #   A B   D
+    if set(newps) == set(oldps) and dest not in newps:
+        # The error message is for compatibility. It's a bit misleading since
+        # rebase is not supposed to add new parents.
+        raise error.Abort(_('cannot use revision %d as base, '
+                            'result would have 3 parents') % rev)
+
+    # Source should not be ancestor of dest. The check here guarantees it's
+    # impossible. Since the check before running actual rebase does not cover
+    # complex cases.
+    if any(p != nullrev and ancestor(p, rev) == rev for p in newps):
+        raise error.Abort(_('source is ancestor of destination'))
+
+    repo.ui.debug(" future parents are %d and %d\n" % tuple(newps))
+
+    # Merge base
+    base = None
+    if len(bases) == 1:
+        base = next(iter(bases))
+    else:
+        # Prefer merge base candidates in (ALLSRC::). Because they are usually
+        # not calculable from changelog DAG alone. For example,
         #
-        # Note: The base found in this 'proof' is only correct in the specified
-        # case. This base does not make sense if is not D a descendant of A or B
-        # or if the other is not parent 'outside' (especially not if the other
-        # parent has been rebased). The current implementation does not
-        # make it feasible to consider different cases separately. In these
-        # other cases we currently just leave it to the user to correctly
-        # resolve an impossible merge using a wrong ancestor.
-        #
-        # xx, p1 could be -4, and both parents could probably be -4...
-        for p in repo[rev].parents():
-            if state.get(p.rev()) == p1:
-                base = p.rev()
-                break
-        else: # fallback when base not found
-            base = None
+        #   B'     rebase -s B -d D, when B was rebased to B'. dest for C
+        #   | C    is B', but merge base for C is B, instead of
+        #   D |    changelog.ancestor(C, B') == A. If changelog DAG and "state"
+        #   | B    edges are merged (so there will be an edge from B to B'),
+        #   |/     the merge base is still ancestor(C, B') in the merged graph.
+        #   A
+        subsetbases = bases & set(state)
+        if len(subsetbases) == 1:
+            base = sorted(subsetbases)[0]
 
-            # Raise because this function is called wrong (see issue 4106)
-            raise AssertionError('no base found to rebase on '
-                                 '(defineparents called wrong)')
-    return rp1 or p1, p2, base
+    return newps[0], newps[1], base
 
 def isagitpatch(repo, patchname):