# Patchwork [04,of,16] test-ancestor: use random testing for missing ancestors

Submitter Siddharth Agarwal Nov. 16, 2014, 9:17 a.m. <60263a5502a8dd6a23e7.1416129426@devbig136.prn2.facebook.com> mbox | patch /patch/6750/ Accepted 3b1b8f25443e2f5fc44a3db75334d119f193c715 show

Siddharth Agarwal - Nov. 16, 2014, 9:17 a.m.
```# HG changeset patch
# User Siddharth Agarwal <sid0@fb.com>
# Date 1416077734 28800
#      Sat Nov 15 10:55:34 2014 -0800
# Parent  a9555154aa62ffee7fe328e7f6f2f7585863724b
test-ancestor: use random testing for missing ancestors

We're going to make changes to the missing ancestor algorithm, and random
testing will give us much more confidence than a fixed set of tests.
```

## Patch

```diff --git a/tests/test-ancestor.py b/tests/test-ancestor.py
--- a/tests/test-ancestor.py
+++ b/tests/test-ancestor.py
@@ -1,4 +1,98 @@
from mercurial import ancestor, commands, hg, ui, util
+from mercurial.node import nullrev
+import binascii, getopt, math, os, random, sys, time
+
+def buildgraph(rng, nodes=100, rootprob=0.05, mergeprob=0.2, prevprob=0.7):
+    '''nodes: total number of nodes in the graph
+    rootprob: probability that a new node (not 0) will be a root
+    mergeprob: probability that, excluding a root a node will be a merge
+    prevprob: probability that p1 will be the previous node
+
+    return value is a graph represented as an adjacency list.
+    '''
+    graph = [None] * nodes
+    for i in xrange(nodes):
+        if i == 0 or rng.random() < rootprob:
+            graph[i] = [nullrev]
+        elif i == 1:
+            graph[i] = 
+        elif rng.random() < mergeprob:
+            if i == 2 or rng.random() < prevprob:
+                # p1 is prev
+                p1 = i - 1
+            else:
+                p1 = rng.randrange(i - 1)
+            p2 = rng.choice(range(0, p1) + range(p1 + 1, i))
+            graph[i] = [p1, p2]
+        elif rng.random() < prevprob:
+            graph[i] = [i - 1]
+        else:
+            graph[i] = [rng.randrange(i - 1)]
+
+    return graph
+
+def buildancestorsets(graph):
+    ancs = [None] * len(graph)
+    for i in xrange(len(graph)):
+        ancs[i] = set([i])
+        if graph[i] == [nullrev]:
+            continue
+        for p in graph[i]:
+            ancs[i].update(ancs[p])
+    return ancs
+
+def naivemissingancestors(ancs, revs, bases):
+    res = set()
+    for rev in revs:
+        if rev != nullrev:
+            res.update(ancs[rev])
+    for base in bases:
+        if base != nullrev:
+            res.difference_update(ancs[base])
+    return sorted(res)
+
+def test_missingancestors(seed, rng):
+    # empirically observed to take around 1 second
+    graphcount = 100
+    testcount = 100
+    nerrs = 
+    # the default mu and sigma give us a nice distribution of mostly
+    # single-digit counts (including 0) with some higher ones
+    def lognormrandom(mu, sigma):
+        return int(math.floor(rng.lognormvariate(mu, sigma)))
+
+    def samplerevs(nodes, mu=1.1, sigma=0.8):
+        count = min(lognormrandom(mu, sigma), len(nodes))
+        return rng.sample(nodes, count)
+
+    def err(seed, graph, bases, revs, output, expected):
+        if nerrs == 0:
+            print >> sys.stderr, 'seed:', hex(seed)[:-1]
+        if gerrs == 0:
+            print >> sys.stderr, 'graph:', graph
+        print >> sys.stderr, '* bases:', bases
+        print >> sys.stderr, '* revs: ', revs
+        print >> sys.stderr, '*  output:  ', output
+        print >> sys.stderr, '*  expected:', expected
+        nerrs += 1
+        gerrs += 1
+
+    for g in xrange(graphcount):
+        graph = buildgraph(rng)
+        ancs = buildancestorsets(graph)
+        gerrs = 
+        for _ in xrange(testcount):
+            # start from nullrev to include it as a possibility
+            graphnodes = range(nullrev, len(graph))
+            bases = samplerevs(graphnodes)
+            revs = samplerevs(graphnodes)
+
+            # fast algorithm
+            h = ancestor.missingancestors(revs, bases, graph.__getitem__)
+            # reference slow algorithm
+            r = naivemissingancestors(ancs, revs, bases)
+            if h != r:
+                err(seed, graph, bases, revs, h, r)

# graph is a dict of child->parent adjacency lists for this graph:
# o  13
@@ -32,43 +126,6 @@
graph = {0: [-1], 1: , 2: , 3: , 4: , 5: , 6: ,
7: , 8: [-1], 9: [6, 7], 10: , 11: [3, 7], 12: ,
13: }
-pfunc = graph.get
-
-def runmissingancestors(revs, bases):
-    print "%% ancestors of %s and not of %s" % (revs, bases)
-    print ancestor.missingancestors(revs, bases, pfunc)
-
-def test_missingancestors():
-    # Empty revs
-    runmissingancestors([], )
-    runmissingancestors([], [])
-
-    # If bases is empty, it's the same as if it were [nullrev]
-    runmissingancestors(, [])
-
-    # Trivial case: revs == bases
-    runmissingancestors(, )
-    runmissingancestors([4, 5, 6], [6, 5, 4])
-
-    # With nullrev
-    runmissingancestors([-1], )
-    runmissingancestors(, [-1])
-
-    # 9 is a parent of 12. 7 is a parent of 9, so an ancestor of 12. 6 is an
-    # ancestor of 12 but not of 7.
-    runmissingancestors(, )
-    runmissingancestors(, )
-    runmissingancestors([12, 9], )
-    runmissingancestors([7, 6], )
-
-    # More complex cases
-    runmissingancestors(, [11, 12])
-    runmissingancestors(, )
-    runmissingancestors(, [10, 12])
-    runmissingancestors(, )
-    runmissingancestors(, )
-    runmissingancestors([10, 11, 12], )
-    runmissingancestors(, [10, 11, 12])

def genlazyancestors(revs, stoprev=0, inclusive=False):
print ("%% lazy ancestor set for %s, stoprev = %s, inclusive = %s" %
@@ -133,7 +190,20 @@
print "  Python returned: %s" % pygcas

def main():
-    test_missingancestors()
+    seed = None
+    opts, args = getopt.getopt(sys.argv[1:], 's:', ['seed='])
+    for o, a in opts:
+        if o in ('-s', '--seed'):
+            seed = long(a, base=0) # accepts base 10 or 16 strings
+
+    if seed is None:
+        try:
+            seed = long(binascii.hexlify(os.urandom(16)), 16)
+        except AttributeError:
+            seed = long(time.time() * 1000)
+
+    rng = random.Random(seed)
+    test_missingancestors(seed, rng)
test_lazyancestors()
test_gca()

diff --git a/tests/test-ancestor.py.out b/tests/test-ancestor.py.out
--- a/tests/test-ancestor.py.out
+++ b/tests/test-ancestor.py.out
@@ -1,39 +1,3 @@
-% ancestors of [] and not of 
-[]
-% ancestors of [] and not of []
-[]
-% ancestors of  and not of []
-[0, 1, 2, 4, 6, 7, 9, 12]
-% ancestors of  and not of 
-[]
-% ancestors of [4, 5, 6] and not of [6, 5, 4]
-[]
-% ancestors of [-1] and not of 
-[]
-% ancestors of  and not of [-1]
-[0, 1, 2, 4, 6, 7, 9, 12]
-% ancestors of  and not of 
-
-% ancestors of  and not of 
-[]
-% ancestors of [12, 9] and not of 
-[6, 9, 12]
-% ancestors of [7, 6] and not of 
-[]
-% ancestors of  and not of [11, 12]
-[5, 10]
-% ancestors of  and not of 
-[3, 7, 11]
-% ancestors of  and not of [10, 12]
-[3, 11]
-% ancestors of  and not of 
-[6, 7, 9, 12]
-% ancestors of  and not of 
-[6, 9, 12]
-% ancestors of [10, 11, 12] and not of 
-[0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12]
-% ancestors of  and not of [10, 11, 12]
-[8, 13]
% lazy ancestor set for [], stoprev = 0, inclusive = False
membership: []
iteration:  []

```