Patchwork [more-in-stack] changegroup: move all compressions utilities in util

login
register
mail settings
Submitter Pierre-Yves David
Date Sept. 16, 2015, 12:50 a.m.
Message ID <8bcc409883f9f5a7448a.1442364657@marginatus.alto.octopoid.net>
Download mbox | patch
Permalink /patch/10512/
State Superseded
Headers show

Comments

Pierre-Yves David - Sept. 16, 2015, 12:50 a.m.
# HG changeset patch
# User Pierre-Yves David <pierre-yves.david@fb.com>
# Date 1442363732 25200
#      Tue Sep 15 17:35:32 2015 -0700
# Node ID 8bcc409883f9f5a7448a628bfe28da597c49a7ad
# Parent  7df5d476087392e217699a41c11fbe8cd48713b2
changegroup: move all compressions utilities in util

We'll reuse the compression for other things (next target bundle2), so let's
make it more accessible and organised.
Yuya Nishihara - Sept. 16, 2015, 3:18 p.m.
On Tue, 15 Sep 2015 17:50:57 -0700, Pierre-Yves David wrote:
> # HG changeset patch
> # User Pierre-Yves David <pierre-yves.david@fb.com>
> # Date 1442363732 25200
> #      Tue Sep 15 17:35:32 2015 -0700
> # Node ID 8bcc409883f9f5a7448a628bfe28da597c49a7ad
> # Parent  7df5d476087392e217699a41c11fbe8cd48713b2
> changegroup: move all compressions utilities in util
> 
> We'll reuse the compression for other things (next target bundle2), so let's
> make it more accessible and organised.
> 
> diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
> --- a/mercurial/changegroup.py
> +++ b/mercurial/changegroup.py
> @@ -5,16 +5,14 @@
>  # This software may be used and distributed according to the terms of the
>  # GNU General Public License version 2 or any later version.
>  
>  from __future__ import absolute_import
>  
> -import bz2
>  import os
>  import struct
>  import tempfile
>  import weakref
> -import zlib
>  
>  from .i18n import _
>  from .node import (
>      hex,
>      nullid,
> @@ -79,24 +77,18 @@ def combineresults(results):
>          result = 1 + changedheads
>      elif changedheads < 0:
>          result = -1 + changedheads
>      return result
>  
> -class nocompress(object):
> -    def compress(self, x):
> -        return x
> -    def flush(self):
> -        return ""
> -
>  bundletypes = {
> -    "": ("", nocompress), # only when using unbundle on ssh and old http servers
> +    "": ("", None), # only when using unbundle on ssh and old http servers
>                            # since the unification ssh accepts a header but there
>                            # is no capability signaling it.

Guessing from the comment, it should be 'UN' type.

>      "HG20": (), # special-cased below
> -    "HG10UN": ("HG10UN", nocompress),
> -    "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()),
> -    "HG10GZ": ("HG10GZ", lambda: zlib.compressobj()),
> +    "HG10UN": ("HG10UN", 'UN'),
> +    "HG10BZ": ("HG10", 'BZ'),
> +    "HG10GZ": ("HG10GZ", 'GZ'),
Pierre-Yves David - Sept. 16, 2015, 5:33 p.m.
On 09/16/2015 08:18 AM, Yuya Nishihara wrote:
> On Tue, 15 Sep 2015 17:50:57 -0700, Pierre-Yves David wrote:
>> # HG changeset patch
>> # User Pierre-Yves David <pierre-yves.david@fb.com>
>> # Date 1442363732 25200
>> #      Tue Sep 15 17:35:32 2015 -0700
>> # Node ID 8bcc409883f9f5a7448a628bfe28da597c49a7ad
>> # Parent  7df5d476087392e217699a41c11fbe8cd48713b2
>> changegroup: move all compressions utilities in util
>>
>> We'll reuse the compression for other things (next target bundle2), so let's
>> make it more accessible and organised.
>>
>> diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
>> --- a/mercurial/changegroup.py
>> +++ b/mercurial/changegroup.py
>> @@ -5,16 +5,14 @@
>>   # This software may be used and distributed according to the terms of the
>>   # GNU General Public License version 2 or any later version.
>>
>>   from __future__ import absolute_import
>>
>> -import bz2
>>   import os
>>   import struct
>>   import tempfile
>>   import weakref
>> -import zlib
>>
>>   from .i18n import _
>>   from .node import (
>>       hex,
>>       nullid,
>> @@ -79,24 +77,18 @@ def combineresults(results):
>>           result = 1 + changedheads
>>       elif changedheads < 0:
>>           result = -1 + changedheads
>>       return result
>>
>> -class nocompress(object):
>> -    def compress(self, x):
>> -        return x
>> -    def flush(self):
>> -        return ""
>> -
>>   bundletypes = {
>> -    "": ("", nocompress), # only when using unbundle on ssh and old http servers
>> +    "": ("", None), # only when using unbundle on ssh and old http servers
>>                             # since the unification ssh accepts a header but there
>>                             # is no capability signaling it.
>
> Guessing from the comment, it should be 'UN' type.

nice catch.

Patch

diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
--- a/mercurial/changegroup.py
+++ b/mercurial/changegroup.py
@@ -5,16 +5,14 @@ 
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
 from __future__ import absolute_import
 
-import bz2
 import os
 import struct
 import tempfile
 import weakref
-import zlib
 
 from .i18n import _
 from .node import (
     hex,
     nullid,
@@ -79,24 +77,18 @@  def combineresults(results):
         result = 1 + changedheads
     elif changedheads < 0:
         result = -1 + changedheads
     return result
 
-class nocompress(object):
-    def compress(self, x):
-        return x
-    def flush(self):
-        return ""
-
 bundletypes = {
-    "": ("", nocompress), # only when using unbundle on ssh and old http servers
+    "": ("", None), # only when using unbundle on ssh and old http servers
                           # since the unification ssh accepts a header but there
                           # is no capability signaling it.
     "HG20": (), # special-cased below
-    "HG10UN": ("HG10UN", nocompress),
-    "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()),
-    "HG10GZ": ("HG10GZ", lambda: zlib.compressobj()),
+    "HG10UN": ("HG10UN", 'UN'),
+    "HG10BZ": ("HG10", 'BZ'),
+    "HG10GZ": ("HG10GZ", 'GZ'),
 }
 
 # hgweb uses this list to communicate its preferred type
 bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']
 
@@ -125,19 +117,22 @@  def writebundle(ui, cg, filename, bundle
         if bundletype == "HG20":
             from . import bundle2
             bundle = bundle2.bundle20(ui)
             part = bundle.newpart('changegroup', data=cg.getchunks())
             part.addparam('version', cg.version)
-            z = nocompress()
+            z = util.compressors['UN']()
             chunkiter = bundle.getchunks()
         else:
             if cg.version != '01':
                 raise util.Abort(_('old bundle types only supports v1 '
                                    'changegroups'))
-            header, compressor = bundletypes[bundletype]
+            header, comp = bundletypes[bundletype]
             fh.write(header)
-            z = compressor()
+            if comp not in util.compressors:
+                raise util.Abort(_('unknown stream compression type: %s')
+                                 % comp)
+            z = util.compressors[comp]()
             chunkiter = cg.getchunks()
 
         # parse the changegroup data, otherwise we will block
         # in case of sshrepo because we don't know the end of the stream
 
@@ -156,34 +151,19 @@  def writebundle(ui, cg, filename, bundle
             if filename and vfs:
                 vfs.unlink(cleanup)
             else:
                 os.unlink(cleanup)
 
-def decompressor(fh, alg):
-    if alg == 'UN':
-        return fh
-    elif alg == 'GZ':
-        def generator(f):
-            zd = zlib.decompressobj()
-            for chunk in util.filechunkiter(f):
-                yield zd.decompress(chunk)
-    elif alg == 'BZ':
-        def generator(f):
-            zd = bz2.BZ2Decompressor()
-            zd.decompress("BZ")
-            for chunk in util.filechunkiter(f, 4096):
-                yield zd.decompress(chunk)
-    else:
-        raise util.Abort("unknown bundle compression '%s'" % alg)
-    return util.chunkbuffer(generator(fh))
-
 class cg1unpacker(object):
     deltaheader = _CHANGEGROUPV1_DELTA_HEADER
     deltaheadersize = struct.calcsize(deltaheader)
     version = '01'
     def __init__(self, fh, alg):
-        self._stream = decompressor(fh, alg)
+        if not alg in util.decompressors:
+            raise util.Abort(_('unknown stream compression type: %s')
+                             % alg)
+        self._stream = util.decompressors[alg](fh)
         self._type = alg
         self.callback = None
     def compressed(self):
         return self._type != 'UN'
     def read(self, l):
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -19,10 +19,12 @@  import error, osutil, encoding, parsers
 import errno, shutil, sys, tempfile, traceback
 import re as remod
 import os, time, datetime, calendar, textwrap, signal, collections
 import imp, socket, urllib
 import gc
+import bz2
+import zlib
 
 if os.name == 'nt':
     import windows as platform
 else:
     import posix as platform
@@ -2336,7 +2338,43 @@  def finddirs(path):
     pos = path.rfind('/')
     while pos != -1:
         yield path[:pos]
         pos = path.rfind('/', 0, pos)
 
+# compression utility
+
+class nocompress(object):
+    def compress(self, x):
+        return x
+    def flush(self):
+        return ""
+
+compressors = {
+    'UN': nocompress,
+    # lambda to prevent early import
+    'BZ': lambda: bz2.BZ2Compressor(),
+    'GZ': lambda: zlib.compressobj(),
+    }
+
+def _makedecompressor(decompcls):
+    def generator(f):
+        d = decompcls()
+        for chunk in filechunkiter(f):
+            yield d.decompress(chunk)
+    def func(fh):
+        return chunkbuffer(generator(fh))
+    return func
+
+def _bz2():
+    d = bz2.BZ2Decompressor()
+    # Bzip2 stream start with BZ, but we stripped it.
+    # we put it back for good measure.
+    d.decompress('BZ')
+    return d
+
+decompressors = {'UN': lambda fh: fh,
+                 'BZ': _makedecompressor(_bz2),
+                 'GZ': _makedecompressor(lambda: zlib.decompressobj()),
+                 }
+
 # convenient shortcut
 dst = debugstacktrace