Patchwork [4,of,6,STABLE] bdiff: fix latent normalization bug

login
register
mail settings
Submitter Matt Mackall
Date April 23, 2016, 3:41 p.m.
Message ID <7d0c61d568ac831fd59f.1461426077@ruin.waste.org>
Download mbox | patch
Permalink /patch/14772/
State Superseded
Commit 4bd67ae7d75a40b50d45c8c043b5de80873844cc
Headers show

Comments

Matt Mackall - April 23, 2016, 3:41 p.m.
# HG changeset patch
# User Matt Mackall <mpm@selenic.com>
# Date 1461293598 18000
#      Thu Apr 21 21:53:18 2016 -0500
# Branch stable
# Node ID 7d0c61d568ac831fd59f6aaf480418acedae1eda
# Parent  48c7e086444c9dc7a80fa3d24b282ad345c07a33
bdiff: fix latent normalization bug

This bug is hidden by the current bias towards matches at the
beginning of the file. When this bias is tweaked later to address
recursion balancing, the normalization code could cause the next block
to shrink to a negative length, thus creating invalid delta chunks. We
add checks here to disallow that.

This bug requires test cases that are an awkwardly large size for the test
suite, but is very rapidly picked up by the included torture tester.
Yuya Nishihara - April 25, 2016, 3:43 p.m.
On Sat, 23 Apr 2016 10:41:17 -0500, Matt Mackall wrote:
> # HG changeset patch
> # User Matt Mackall <mpm@selenic.com>
> # Date 1461293598 18000
> #      Thu Apr 21 21:53:18 2016 -0500
> # Branch stable
> # Node ID 7d0c61d568ac831fd59f6aaf480418acedae1eda
> # Parent  48c7e086444c9dc7a80fa3d24b282ad345c07a33
> bdiff: fix latent normalization bug

> +++ b/contrib/bdiff-torture.py	Thu Apr 21 21:53:18 2016 -0500
> @@ -0,0 +1,98 @@
> +# Randomized torture test generation for bdiff
> +
> +from __future__ import absolute_import, print_function
> +import struct, random, sys

test-check-pyflakes.t said hi.

+  contrib/bdiff-torture.py:4: 'struct' imported but unused

Patch

diff -r 48c7e086444c -r 7d0c61d568ac contrib/bdiff-torture.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/bdiff-torture.py	Thu Apr 21 21:53:18 2016 -0500
@@ -0,0 +1,98 @@ 
+# Randomized torture test generation for bdiff
+
+from __future__ import absolute_import, print_function
+import struct, random, sys
+from mercurial import (
+    bdiff,
+    mpatch,
+)
+
+def reducetest(a, b):
+    tries = 0
+    reductions = 0
+    print("reducing...")
+    while tries < 1000:
+        a2 = "\n".join(l for l in a.splitlines()
+                       if random.randint(0, 100) > 0) + "\n"
+        b2 = "\n".join(l for l in b.splitlines()
+                       if random.randint(0, 100) > 0) + "\n"
+        if a2 == a and b2 == b:
+            continue
+        if a2 == b2:
+            continue
+        tries += 1
+
+        try:
+            test1(a, b)
+        except Exception as inst:
+            reductions += 1
+            tries = 0
+            a = a2
+            b = b2
+
+    print("reduced:", reductions, len(a) + len(b),
+          repr(a), repr(b))
+    try:
+        test1(a, b)
+    except Exception as inst:
+        print("failed:", inst)
+
+    sys.exit(0)
+
+def test1(a, b):
+    d = bdiff.bdiff(a, b)
+    if not d:
+        raise ValueError("empty")
+    c = mpatch.patches(a, [d])
+    if c != b:
+        raise ValueError("bad")
+
+def testwrap(a, b):
+    try:
+        test1(a, b)
+        return
+    except Exception as inst:
+        pass
+    print("exception:", inst)
+    reducetest(a, b)
+
+def test(a, b):
+    testwrap(a, b)
+    testwrap(b, a)
+
+def rndtest(size, noise):
+    a = []
+    src = "                aaaaaaaabbbbccd"
+    for x in xrange(size):
+        a.append(src[random.randint(0, len(src) - 1)])
+
+    while True:
+        b = [c for c in a if random.randint(0, 99) > noise]
+        b2 = []
+        for c in b:
+            b2.append(c)
+            while random.randint(0, 99) < noise:
+                b2.append(src[random.randint(0, len(src) - 1)])
+        if b2 != a:
+            break
+
+    a = "\n".join(a) + "\n"
+    b = "\n".join(b2) + "\n"
+
+    test(a, b)
+
+maxvol = 10000
+startsize = 2
+while True:
+    size = startsize
+    count = 0
+    while size < maxvol:
+        print(size)
+        volume = 0
+        while volume < maxvol:
+            rndtest(size, 2)
+            volume += size
+            count += 2
+        size *= 2
+    maxvol *= 4
+    startsize *= 4
diff -r 48c7e086444c -r 7d0c61d568ac mercurial/bdiff.c
--- a/mercurial/bdiff.c	Thu Apr 21 21:46:31 2016 -0500
+++ b/mercurial/bdiff.c	Thu Apr 21 21:53:18 2016 -0500
@@ -265,6 +265,8 @@ 
 
 		if (curr->a2 == next->a1 || curr->b2 == next->b1)
 			while (curr->a2 < an && curr->b2 < bn
+			       && next->a1 < next->a2
+			       && next->b1 < next->b2
 			       && !cmp(a + curr->a2, b + curr->b2)) {
 				curr->a2++;
 				next->a1++;