Patchwork bdiff: implement cffi version of bdiff

login
register
mail settings
Submitter Maciej Fijalkowski
Date Aug. 20, 2016, 9:06 p.m.
Message ID <02c99939ee6a6157de65.1471727192@brick.arcode.com>
Download mbox | patch
Permalink /patch/16366/
State Accepted
Headers show

Comments

Maciej Fijalkowski - Aug. 20, 2016, 9:06 p.m.
# HG changeset patch
# User Maciej Fijalkowski <fijall@gmail.com>
# Date 1471727161 -7200
#      Sat Aug 20 23:06:01 2016 +0200
# Node ID 02c99939ee6a6157de657bf8fffb7b998cd827b0
# Parent  17bb55a4276ea6d85e10c128fa2b83c7e170e9de
bdiff: implement cffi version of bdiff
Yuya Nishihara - Aug. 24, 2016, 1:20 p.m.
On Sat, 20 Aug 2016 23:06:32 +0200, Maciej Fijalkowski wrote:
> # HG changeset patch
> # User Maciej Fijalkowski <fijall@gmail.com>
> # Date 1471727161 -7200
> #      Sat Aug 20 23:06:01 2016 +0200
> # Node ID 02c99939ee6a6157de657bf8fffb7b998cd827b0
> # Parent  17bb55a4276ea6d85e10c128fa2b83c7e170e9de
> bdiff: implement cffi version of bdiff

Looks good per comparison with bdiff_module.c.
Queued, thanks.

> +        def bdiff(sa, sb):
> +            a = ffi.new("struct bdiff_line**")
> +            b = ffi.new("struct bdiff_line**")
> +            ac = ffi.new("char[]", sa)
> +            bc = ffi.new("char[]", sb)
> +            l = ffi.new("struct bdiff_hunk*")
> +            try:
> +                an = lib.bdiff_splitlines(ac, len(sa), a)
> +                bn = lib.bdiff_splitlines(bc, len(sb), b)
> +                if not a[0] or not b[0]:
> +                    raise MemoryError
> +                count = lib.bdiff_diff(a[0], an, b[0], bn, l)
> +                if count < 0:
> +                    raise MemoryError
> +                rl = []
> +                h = l.next
> +                la = lb = 0
> +                while h:
> +                    if h.a1 != la or h.b1 != lb:
> +                        lgt = (b[0] + h.b1).l - (b[0] + lb).l
> +                        rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
> +                            (a[0] + h.a1).l - a[0].l, lgt))

This appears not to agree with the C implementation [1], in which the maximum
length is UINT_MAX, but pure codes always pack/unpack them as signed integers.
So ">lll" should be right.

 [1]: https://selenic.com/repo/hg/rev/d6fb7bbec16a

Patch

diff --git a/mercurial/pure/bdiff.py b/mercurial/pure/bdiff.py
--- a/mercurial/pure/bdiff.py
+++ b/mercurial/pure/bdiff.py
@@ -83,7 +83,6 @@ 
             bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s)
         la = am + size
         lb = bm + size
-
     return "".join(bin)
 
 def blocks(a, b):
@@ -113,12 +112,12 @@ 
             b = ffi.new("struct bdiff_line**")
             ac = ffi.new("char[]", sa)
             bc = ffi.new("char[]", sb)
+            l = ffi.new("struct bdiff_hunk*")
             try:
                 an = lib.bdiff_splitlines(ac, len(sa), a)
                 bn = lib.bdiff_splitlines(bc, len(sb), b)
                 if not a[0] or not b[0]:
                     raise MemoryError
-                l = ffi.new("struct bdiff_hunk*")
                 count = lib.bdiff_diff(a[0], an, b[0], bn, l)
                 if count < 0:
                     raise MemoryError
@@ -134,3 +133,36 @@ 
                 lib.free(b[0])
                 lib.bdiff_freehunks(l.next)
             return rl
+
+        def bdiff(sa, sb):
+            a = ffi.new("struct bdiff_line**")
+            b = ffi.new("struct bdiff_line**")
+            ac = ffi.new("char[]", sa)
+            bc = ffi.new("char[]", sb)
+            l = ffi.new("struct bdiff_hunk*")
+            try:
+                an = lib.bdiff_splitlines(ac, len(sa), a)
+                bn = lib.bdiff_splitlines(bc, len(sb), b)
+                if not a[0] or not b[0]:
+                    raise MemoryError
+                count = lib.bdiff_diff(a[0], an, b[0], bn, l)
+                if count < 0:
+                    raise MemoryError
+                rl = []
+                h = l.next
+                la = lb = 0
+                while h:
+                    if h.a1 != la or h.b1 != lb:
+                        lgt = (b[0] + h.b1).l - (b[0] + lb).l
+                        rl.append(struct.pack(">lll", (a[0] + la).l - a[0].l,
+                            (a[0] + h.a1).l - a[0].l, lgt))
+                        rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
+                    la = h.a2
+                    lb = h.b2
+                    h = h.next
+
+            finally:
+                lib.free(a[0])
+                lib.free(b[0])
+                lib.bdiff_freehunks(l.next)
+            return "".join(rl)