Patchwork [07,of,16] ancestor.missingancestors: turn into a state-keeping class

login
register
mail settings
Submitter Siddharth Agarwal
Date Nov. 16, 2014, 9:17 a.m.
Message ID <19925690cbad7e74263b.1416129429@devbig136.prn2.facebook.com>
Download mbox | patch
Permalink /patch/6760/
State Accepted
Commit 59e6e5dd36050712c223989443bc1a3af623c804
Headers show

Comments

Siddharth Agarwal - Nov. 16, 2014, 9:17 a.m.
# HG changeset patch
# User Siddharth Agarwal <sid0@fb.com>
# Date 1416037478 28800
#      Fri Nov 14 23:44:38 2014 -0800
# Node ID 19925690cbad7e74263bca6c6d84a53842e3fa59
# Parent  4fb2a3d7dca6732a297151cb0ef12647542b22b2
ancestor.missingancestors: turn into a state-keeping class

This allows multiple efficient missing ancestor queries against the same set of
bases. In upcoming patches we'll also define ways to grow the set of bases.

The fact that the test output hasn't changed establishes this patch's
correctness.

Patch

diff --git a/mercurial/ancestor.py b/mercurial/ancestor.py
--- a/mercurial/ancestor.py
+++ b/mercurial/ancestor.py
@@ -134,83 +134,95 @@ 
         return gca
     return deepest(gca)
 
+class incrementalmissingancestors(object):
+    '''persistent state used to calculate missing ancestors incrementally
+
+    Although similar in spirit to lazyancestors below, this is a separate class
+    because trying to support contains and missingancestors operations with the
+    same internal data structures adds needless complexity.'''
+    def __init__(self, pfunc, bases):
+        self.bases = set(bases)
+        if not self.bases:
+            self.bases.add(nullrev)
+        self.pfunc = pfunc
+
+    def missingancestors(self, revs):
+        '''return all the ancestors of revs that are not ancestors of self.bases
+
+        This may include elements from revs.
+
+        Equivalent to the revset (::revs - ::self.bases). Revs are returned in
+        revision number order, which is a topological order.'''
+        revsvisit = set(revs)
+        basesvisit = self.bases
+        pfunc = self.pfunc
+        bothvisit = revsvisit.intersection(basesvisit)
+        revsvisit.difference_update(bothvisit)
+        if not revsvisit:
+            return []
+
+        start = max(max(revsvisit), max(basesvisit))
+        # At this point, we hold the invariants that:
+        # - revsvisit is the set of nodes we know are an ancestor of at least
+        #   one of the nodes in revs
+        # - basesvisit is the same for bases
+        # - bothvisit is the set of nodes we know are ancestors of at least one
+        #   of the nodes in revs and one of the nodes in bases. bothvisit and
+        #   revsvisit are mutually exclusive, but bothvisit is a subset of
+        #   basesvisit.
+        # Now we walk down in reverse topo order, adding parents of nodes
+        # already visited to the sets while maintaining the invariants. When a
+        # node is found in both revsvisit and basesvisit, it is removed from
+        # revsvisit and added to bothvisit. When revsvisit becomes empty, there
+        # are no more ancestors of revs that aren't also ancestors of bases, so
+        # exit.
+
+        missing = []
+        for curr in xrange(start, nullrev, -1):
+            if not revsvisit:
+                break
+
+            if curr in bothvisit:
+                bothvisit.remove(curr)
+                # curr's parents might have made it into revsvisit through
+                # another path
+                for p in pfunc(curr):
+                    revsvisit.discard(p)
+                    basesvisit.add(p)
+                    bothvisit.add(p)
+                continue
+
+            if curr in revsvisit:
+                missing.append(curr)
+                revsvisit.remove(curr)
+                thisvisit = revsvisit
+                othervisit = basesvisit
+            elif curr in basesvisit:
+                thisvisit = basesvisit
+                othervisit = revsvisit
+            else:
+                # not an ancestor of revs or bases: ignore
+                continue
+
+            for p in pfunc(curr):
+                if p == nullrev:
+                    pass
+                elif p in othervisit or p in bothvisit:
+                    # p is implicitly in thisvisit. This means p is or should be
+                    # in bothvisit
+                    revsvisit.discard(p)
+                    basesvisit.add(p)
+                    bothvisit.add(p)
+                else:
+                    # visit later
+                    thisvisit.add(p)
+
+        missing.reverse()
+        return missing
+
 def missingancestors(revs, bases, pfunc):
-    """Return all the ancestors of revs that are not ancestors of bases.
-
-    This may include elements from revs.
-
-    Equivalent to the revset (::revs - ::bases). Revs are returned in
-    revision number order, which is a topological order.
-
-    revs and bases should both be iterables. pfunc must return a list of
-    parent revs for a given revs.
-    """
-
-    revsvisit = set(revs)
-    basesvisit = set(bases)
-    if not basesvisit:
-        basesvisit.add(nullrev)
-    bothvisit = revsvisit.intersection(basesvisit)
-    revsvisit.difference_update(bothvisit)
-    if not revsvisit:
-        return []
-    start = max(max(revsvisit), max(basesvisit))
-    # At this point, we hold the invariants that:
-    # - revsvisit is the set of nodes we know are an ancestor of at least one
-    #   of the nodes in revs
-    # - basesvisit is the same for bases
-    # - bothvisit is the set of nodes we know are ancestors of at least one of
-    #   the nodes in revs and one of the nodes in bases, and that are smaller
-    #   than curr. bothvisit and revsvisit are mutually exclusive, but bothvisit
-    #   is a subset of basesvisit.
-    # Now we walk down in reverse topo order, adding parents of nodes already
-    # visited to the sets while maintaining the invariants. When a node is found
-    # in both revsvisit and basesvisit, it is removed from revsvisit and added
-    # to bothvisit. When revsvisit becomes empty, there are no more ancestors of
-    # revs that aren't also ancestors of bases, so exit.
-
-    missing = []
-    for curr in xrange(start, nullrev, -1):
-        if not revsvisit:
-            break
-
-        if curr in bothvisit:
-            bothvisit.remove(curr)
-            # curr's parents might have made it into revsvisit through another
-            # path
-            for p in pfunc(curr):
-                revsvisit.discard(p)
-                basesvisit.add(p)
-                bothvisit.add(p)
-            continue
-
-        if curr in revsvisit:
-            missing.append(curr)
-            revsvisit.remove(curr)
-            thisvisit = revsvisit
-            othervisit = basesvisit
-        elif curr in basesvisit:
-            thisvisit = basesvisit
-            othervisit = revsvisit
-        else:
-            # not an ancestor of revs or bases: ignore
-            continue
-
-        for p in pfunc(curr):
-            if p == nullrev:
-                pass
-            elif p in othervisit or p in bothvisit:
-                # p is implicitly in thisvisit. This means p is or should be
-                # in bothvisit
-                revsvisit.discard(p)
-                basesvisit.add(p)
-                bothvisit.add(p)
-            else:
-                # visit later
-                thisvisit.add(p)
-
-    missing.reverse()
-    return missing
+    inc = incrementalmissingancestors(pfunc, bases)
+    return inc.missingancestors(revs)
 
 class lazyancestors(object):
     def __init__(self, pfunc, revs, stoprev=0, inclusive=False):
diff --git a/tests/test-ancestor.py b/tests/test-ancestor.py
--- a/tests/test-ancestor.py
+++ b/tests/test-ancestor.py
@@ -88,7 +88,8 @@ 
             revs = samplerevs(graphnodes)
 
             # fast algorithm
-            h = ancestor.missingancestors(revs, bases, graph.__getitem__)
+            inc = ancestor.incrementalmissingancestors(graph.__getitem__, bases)
+            h = inc.missingancestors(revs)
             # reference slow algorithm
             r = naivemissingancestors(ancs, revs, bases)
             if h != r: