Patchwork [V2,more-in-stack] changegroup: move all compressions utilities in util

login
register
mail settings
Submitter Pierre-Yves David
Date Sept. 16, 2015, 8:39 p.m.
Message ID <b372d84c715816cb8dce.1442435972@marginatus.alto.octopoid.net>
Download mbox | patch
Permalink /patch/10515/
State Accepted
Headers show

Comments

Pierre-Yves David - Sept. 16, 2015, 8:39 p.m.
# HG changeset patch
# User Pierre-Yves David <pierre-yves.david@fb.com>
# Date 1442363732 25200
#      Tue Sep 15 17:35:32 2015 -0700
# Node ID b372d84c715816cb8dcec8431a6e1760bdd101a7
# Parent  7df5d476087392e217699a41c11fbe8cd48713b2
changegroup: move all compressions utilities in util

We'll reuse the compression for other things (next target bundle2), so let's
make it more accessible and organised.
Matt Mackall - Sept. 17, 2015, 11:43 p.m.
On Wed, 2015-09-16 at 13:39 -0700, Pierre-Yves David wrote:
> # HG changeset patch
> # User Pierre-Yves David <pierre-yves.david@fb.com>
> # Date 1442363732 25200
> #      Tue Sep 15 17:35:32 2015 -0700
> # Node ID b372d84c715816cb8dcec8431a6e1760bdd101a7
> # Parent  7df5d476087392e217699a41c11fbe8cd48713b2
> changegroup: move all compressions utilities in util

Queued for default, thanks.

Patch

diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
--- a/mercurial/changegroup.py
+++ b/mercurial/changegroup.py
@@ -5,16 +5,14 @@ 
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
 from __future__ import absolute_import
 
-import bz2
 import os
 import struct
 import tempfile
 import weakref
-import zlib
 
 from .i18n import _
 from .node import (
     hex,
     nullid,
@@ -79,24 +77,18 @@  def combineresults(results):
         result = 1 + changedheads
     elif changedheads < 0:
         result = -1 + changedheads
     return result
 
-class nocompress(object):
-    def compress(self, x):
-        return x
-    def flush(self):
-        return ""
-
 bundletypes = {
-    "": ("", nocompress), # only when using unbundle on ssh and old http servers
+    "": ("", 'UN'),       # only when using unbundle on ssh and old http servers
                           # since the unification ssh accepts a header but there
                           # is no capability signaling it.
     "HG20": (), # special-cased below
-    "HG10UN": ("HG10UN", nocompress),
-    "HG10BZ": ("HG10", lambda: bz2.BZ2Compressor()),
-    "HG10GZ": ("HG10GZ", lambda: zlib.compressobj()),
+    "HG10UN": ("HG10UN", 'UN'),
+    "HG10BZ": ("HG10", 'BZ'),
+    "HG10GZ": ("HG10GZ", 'GZ'),
 }
 
 # hgweb uses this list to communicate its preferred type
 bundlepriority = ['HG10GZ', 'HG10BZ', 'HG10UN']
 
@@ -125,19 +117,22 @@  def writebundle(ui, cg, filename, bundle
         if bundletype == "HG20":
             from . import bundle2
             bundle = bundle2.bundle20(ui)
             part = bundle.newpart('changegroup', data=cg.getchunks())
             part.addparam('version', cg.version)
-            z = nocompress()
+            z = util.compressors['UN']()
             chunkiter = bundle.getchunks()
         else:
             if cg.version != '01':
                 raise util.Abort(_('old bundle types only supports v1 '
                                    'changegroups'))
-            header, compressor = bundletypes[bundletype]
+            header, comp = bundletypes[bundletype]
             fh.write(header)
-            z = compressor()
+            if comp not in util.compressors:
+                raise util.Abort(_('unknown stream compression type: %s')
+                                 % comp)
+            z = util.compressors[comp]()
             chunkiter = cg.getchunks()
 
         # parse the changegroup data, otherwise we will block
         # in case of sshrepo because we don't know the end of the stream
 
@@ -156,34 +151,19 @@  def writebundle(ui, cg, filename, bundle
             if filename and vfs:
                 vfs.unlink(cleanup)
             else:
                 os.unlink(cleanup)
 
-def decompressor(fh, alg):
-    if alg == 'UN':
-        return fh
-    elif alg == 'GZ':
-        def generator(f):
-            zd = zlib.decompressobj()
-            for chunk in util.filechunkiter(f):
-                yield zd.decompress(chunk)
-    elif alg == 'BZ':
-        def generator(f):
-            zd = bz2.BZ2Decompressor()
-            zd.decompress("BZ")
-            for chunk in util.filechunkiter(f, 4096):
-                yield zd.decompress(chunk)
-    else:
-        raise util.Abort("unknown bundle compression '%s'" % alg)
-    return util.chunkbuffer(generator(fh))
-
 class cg1unpacker(object):
     deltaheader = _CHANGEGROUPV1_DELTA_HEADER
     deltaheadersize = struct.calcsize(deltaheader)
     version = '01'
     def __init__(self, fh, alg):
-        self._stream = decompressor(fh, alg)
+        if not alg in util.decompressors:
+            raise util.Abort(_('unknown stream compression type: %s')
+                             % alg)
+        self._stream = util.decompressors[alg](fh)
         self._type = alg
         self.callback = None
     def compressed(self):
         return self._type != 'UN'
     def read(self, l):
diff --git a/mercurial/util.py b/mercurial/util.py
--- a/mercurial/util.py
+++ b/mercurial/util.py
@@ -19,10 +19,12 @@  import error, osutil, encoding, parsers
 import errno, shutil, sys, tempfile, traceback
 import re as remod
 import os, time, datetime, calendar, textwrap, signal, collections
 import imp, socket, urllib
 import gc
+import bz2
+import zlib
 
 if os.name == 'nt':
     import windows as platform
 else:
     import posix as platform
@@ -2336,7 +2338,43 @@  def finddirs(path):
     pos = path.rfind('/')
     while pos != -1:
         yield path[:pos]
         pos = path.rfind('/', 0, pos)
 
+# compression utility
+
+class nocompress(object):
+    def compress(self, x):
+        return x
+    def flush(self):
+        return ""
+
+compressors = {
+    'UN': nocompress,
+    # lambda to prevent early import
+    'BZ': lambda: bz2.BZ2Compressor(),
+    'GZ': lambda: zlib.compressobj(),
+    }
+
+def _makedecompressor(decompcls):
+    def generator(f):
+        d = decompcls()
+        for chunk in filechunkiter(f):
+            yield d.decompress(chunk)
+    def func(fh):
+        return chunkbuffer(generator(fh))
+    return func
+
+def _bz2():
+    d = bz2.BZ2Decompressor()
+    # Bzip2 stream start with BZ, but we stripped it.
+    # we put it back for good measure.
+    d.decompress('BZ')
+    return d
+
+decompressors = {'UN': lambda fh: fh,
+                 'BZ': _makedecompressor(_bz2),
+                 'GZ': _makedecompressor(lambda: zlib.decompressobj()),
+                 }
+
 # convenient shortcut
 dst = debugstacktrace