Patchwork patch: add support for git delta hunks

login
register
mail settings
Submitter 'Nicolas Vigier
Date Nov. 26, 2013, 11:17 a.m.
Message ID <f8cb3b94a9f68dcca3d9.1385464675@localhost>
Download mbox | patch
Permalink /patch/3153/
State Superseded
Commit 9f1d4323c749a8b902095f15587a8d0ec843e1d5
Headers show

Comments

'Nicolas Vigier - Nov. 26, 2013, 11:17 a.m.
# HG changeset patch
# User Nicolas Vigier <boklm@mars-attacks.org>
# Date 1385424419 -3600
# Node ID f8cb3b94a9f68dcca3d9545b14a7ecc21e8ba156
# Parent  1c46b18b0e1c47fa4cecf21b78c083a54ae9903f
patch: add support for git delta hunks

When creating patches modifying binary files using "git format-patch",
git creates 'literal' and 'delta' hunks. Mercurial currently supports
'literal' hunks only, which makes it impossible to import patches with
'delta' hunks.

This changeset adds support for 'delta' hunks. It is a reimplementation
of patch-delta.c from git :
http://git.kernel.org/cgit/git/git.git/tree/patch-delta.c
Augie Fackler - Nov. 27, 2013, 2:53 p.m.
On Tue, Nov 26, 2013 at 12:17:55PM +0100, Nicolas Vigier wrote:
> # HG changeset patch
> # User Nicolas Vigier <boklm@mars-attacks.org>
> # Date 1385424419 -3600
> # Node ID f8cb3b94a9f68dcca3d9545b14a7ecc21e8ba156
> # Parent  1c46b18b0e1c47fa4cecf21b78c083a54ae9903f
> patch: add support for git delta hunks
>
> When creating patches modifying binary files using "git format-patch",
> git creates 'literal' and 'delta' hunks. Mercurial currently supports
> 'literal' hunks only, which makes it impossible to import patches with
> 'delta' hunks.
>
> This changeset adds support for 'delta' hunks. It is a reimplementation
> of patch-delta.c from git :
> http://git.kernel.org/cgit/git/git.git/tree/patch-delta.c
>
> diff -r 1c46b18b0e1c -r f8cb3b94a9f6 mercurial/patch.py
> --- a/mercurial/patch.py	Fri Nov 22 17:26:58 2013 -0600
> +++ b/mercurial/patch.py	Tue Nov 26 01:06:59 2013 +0100
> @@ -721,8 +721,9 @@
>              if self.remove:
>                  self.backend.unlink(self.fname)
>              else:
> -                self.lines[:] = h.new()
> -                self.offset += len(h.new())
> +                l = h.new(self.lines)
> +                self.lines[:] = l
> +                self.offset += len(l)
>                  self.dirty = True
>              return 0
>
> @@ -1016,9 +1017,10 @@
>          return old, oldstart, new, newstart
>
>  class binhunk(object):
> -    'A binary patch file. Only understands literals so far.'
> +    'A binary patch file.'
>      def __init__(self, lr, fname):
>          self.text = None
> +        self.delta = False
>          self.hunk = ['GIT binary patch\n']
>          self._fname = fname
>          self._read(lr)
> @@ -1026,8 +1028,63 @@
>      def complete(self):
>          return self.text is not None
>
> -    def new(self):
> -        return [self.text]
> +    def new(self, lines):
> +        def deltahead(binchunk):
> +            i = 0
> +            for c in binchunk:
> +                i += 1
> +                if not (ord(c) & 0x80):
> +                    return i
> +            return i
> +        def applydelta(binchunk, data):

I may be missing something, but it look slike this nested function
could be a module-level function and then be a little more readable?

> +            out = ""
> +            s = deltahead(binchunk)
> +            binchunk = binchunk[s:]
> +            s = deltahead(binchunk)
> +            binchunk = binchunk[s:]
> +            i = 0
> +            while i < len(binchunk):
> +                cmd = ord(binchunk[i])
> +                i += 1
> +                if (cmd & 0x80):
> +                    offset = 0
> +                    size = 0
> +                    if (cmd & 0x01):
> +                        offset = ord(binchunk[i])
> +                        i += 1
> +                    if (cmd & 0x02):
> +                        offset |= ord(binchunk[i]) << 8
> +                        i += 1
> +                    if (cmd & 0x04):
> +                        offset |= ord(binchunk[i]) << 16
> +                        i += 1
> +                    if (cmd & 0x08):
> +                        offset |= ord(binchunk[i]) << 24
> +                        i += 1
> +                    if (cmd & 0x10):
> +                        size = ord(binchunk[i])
> +                        i += 1
> +                    if (cmd & 0x20):
> +                        size |= ord(binchunk[i]) << 8
> +                        i += 1
> +                    if (cmd & 0x40):
> +                        size |= ord(binchunk[i]) << 16
> +                        i += 1
> +                    if size == 0:
> +                        size = 0x10000
> +                    offset_end = offset + size
> +                    out += data[offset:offset_end]
> +                elif cmd != 0:
> +                    offset_end = i + cmd
> +                    out += binchunk[i:offset_end]
> +                    i += cmd
> +                else:
> +                    raise PatchError(_('unexpected delta opcode 0'))
> +            return out
> +        if self.delta:
> +            return [applydelta(self.text, ''.join(lines))]
> +        else:
> +            return [self.text]
>
>      def _read(self, lr):
>          def getline(lr, hunk):
> @@ -1035,14 +1092,19 @@
>              hunk.append(l)
>              return l.rstrip('\r\n')
>
> +        size = 0
>          while True:
>              line = getline(lr, self.hunk)
>              if not line:
>                  raise PatchError(_('could not extract "%s" binary data')
>                                   % self._fname)
>              if line.startswith('literal '):
> +                size = int(line[8:].rstrip())
>                  break
> -        size = int(line[8:].rstrip())
> +            if line.startswith('delta '):
> +                size = int(line[6:].rstrip())
> +                self.delta = True
> +                break
>          dec = []
>          line = getline(lr, self.hunk)
>          while len(line) > 1:
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
'Nicolas Vigier - Nov. 27, 2013, 3:39 p.m.
On Wed, 27 Nov 2013, Augie Fackler wrote:

> On Tue, Nov 26, 2013 at 12:17:55PM +0100, Nicolas Vigier wrote:
> > # HG changeset patch
> > # User Nicolas Vigier <boklm@mars-attacks.org>
> > # Date 1385424419 -3600
> > # Node ID f8cb3b94a9f68dcca3d9545b14a7ecc21e8ba156
> > # Parent  1c46b18b0e1c47fa4cecf21b78c083a54ae9903f
> > patch: add support for git delta hunks
> >
> > When creating patches modifying binary files using "git format-patch",
> > git creates 'literal' and 'delta' hunks. Mercurial currently supports
> > 'literal' hunks only, which makes it impossible to import patches with
> > 'delta' hunks.
> >
> > This changeset adds support for 'delta' hunks. It is a reimplementation
> > of patch-delta.c from git :
> > http://git.kernel.org/cgit/git/git.git/tree/patch-delta.c
> >
> > diff -r 1c46b18b0e1c -r f8cb3b94a9f6 mercurial/patch.py
> > --- a/mercurial/patch.py	Fri Nov 22 17:26:58 2013 -0600
> > +++ b/mercurial/patch.py	Tue Nov 26 01:06:59 2013 +0100
> > @@ -721,8 +721,9 @@
> >              if self.remove:
> >                  self.backend.unlink(self.fname)
> >              else:
> > -                self.lines[:] = h.new()
> > -                self.offset += len(h.new())
> > +                l = h.new(self.lines)
> > +                self.lines[:] = l
> > +                self.offset += len(l)
> >                  self.dirty = True
> >              return 0
> >
> > @@ -1016,9 +1017,10 @@
> >          return old, oldstart, new, newstart
> >
> >  class binhunk(object):
> > -    'A binary patch file. Only understands literals so far.'
> > +    'A binary patch file.'
> >      def __init__(self, lr, fname):
> >          self.text = None
> > +        self.delta = False
> >          self.hunk = ['GIT binary patch\n']
> >          self._fname = fname
> >          self._read(lr)
> > @@ -1026,8 +1028,63 @@
> >      def complete(self):
> >          return self.text is not None
> >
> > -    def new(self):
> > -        return [self.text]
> > +    def new(self, lines):
> > +        def deltahead(binchunk):
> > +            i = 0
> > +            for c in binchunk:
> > +                i += 1
> > +                if not (ord(c) & 0x80):
> > +                    return i
> > +            return i
> > +        def applydelta(binchunk, data):
> 
> I may be missing something, but it look slike this nested function
> could be a module-level function and then be a little more readable?

Indeed, this could be a module-level function. I'll send a new patch.

Patch

diff -r 1c46b18b0e1c -r f8cb3b94a9f6 mercurial/patch.py
--- a/mercurial/patch.py	Fri Nov 22 17:26:58 2013 -0600
+++ b/mercurial/patch.py	Tue Nov 26 01:06:59 2013 +0100
@@ -721,8 +721,9 @@ 
             if self.remove:
                 self.backend.unlink(self.fname)
             else:
-                self.lines[:] = h.new()
-                self.offset += len(h.new())
+                l = h.new(self.lines)
+                self.lines[:] = l
+                self.offset += len(l)
                 self.dirty = True
             return 0
 
@@ -1016,9 +1017,10 @@ 
         return old, oldstart, new, newstart
 
 class binhunk(object):
-    'A binary patch file. Only understands literals so far.'
+    'A binary patch file.'
     def __init__(self, lr, fname):
         self.text = None
+        self.delta = False
         self.hunk = ['GIT binary patch\n']
         self._fname = fname
         self._read(lr)
@@ -1026,8 +1028,63 @@ 
     def complete(self):
         return self.text is not None
 
-    def new(self):
-        return [self.text]
+    def new(self, lines):
+        def deltahead(binchunk):
+            i = 0
+            for c in binchunk:
+                i += 1
+                if not (ord(c) & 0x80):
+                    return i
+            return i
+        def applydelta(binchunk, data):
+            out = ""
+            s = deltahead(binchunk)
+            binchunk = binchunk[s:]
+            s = deltahead(binchunk)
+            binchunk = binchunk[s:]
+            i = 0
+            while i < len(binchunk):
+                cmd = ord(binchunk[i])
+                i += 1
+                if (cmd & 0x80):
+                    offset = 0
+                    size = 0
+                    if (cmd & 0x01):
+                        offset = ord(binchunk[i])
+                        i += 1
+                    if (cmd & 0x02):
+                        offset |= ord(binchunk[i]) << 8
+                        i += 1
+                    if (cmd & 0x04):
+                        offset |= ord(binchunk[i]) << 16
+                        i += 1
+                    if (cmd & 0x08):
+                        offset |= ord(binchunk[i]) << 24
+                        i += 1
+                    if (cmd & 0x10):
+                        size = ord(binchunk[i])
+                        i += 1
+                    if (cmd & 0x20):
+                        size |= ord(binchunk[i]) << 8
+                        i += 1
+                    if (cmd & 0x40):
+                        size |= ord(binchunk[i]) << 16
+                        i += 1
+                    if size == 0:
+                        size = 0x10000
+                    offset_end = offset + size
+                    out += data[offset:offset_end]
+                elif cmd != 0:
+                    offset_end = i + cmd
+                    out += binchunk[i:offset_end]
+                    i += cmd
+                else:
+                    raise PatchError(_('unexpected delta opcode 0'))
+            return out
+        if self.delta:
+            return [applydelta(self.text, ''.join(lines))]
+        else:
+            return [self.text]
 
     def _read(self, lr):
         def getline(lr, hunk):
@@ -1035,14 +1092,19 @@ 
             hunk.append(l)
             return l.rstrip('\r\n')
 
+        size = 0
         while True:
             line = getline(lr, self.hunk)
             if not line:
                 raise PatchError(_('could not extract "%s" binary data')
                                  % self._fname)
             if line.startswith('literal '):
+                size = int(line[8:].rstrip())
                 break
-        size = int(line[8:].rstrip())
+            if line.startswith('delta '):
+                size = int(line[6:].rstrip())
+                self.delta = True
+                break
         dec = []
         line = getline(lr, self.hunk)
         while len(line) > 1: