From patchwork Sat Apr 1 22:31:56 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [5, of, 9] util: validate and extract compression-related bundlespec parameters From: Gregory Szorc X-Patchwork-Id: 19897 Message-Id: <62e377f673f3d9e10701.1491085916@ubuntu-vm-main> To: mercurial-devel@mercurial-scm.org Date: Sat, 01 Apr 2017 15:31:56 -0700 # HG changeset patch # User Gregory Szorc # Date 1491079470 25200 # Sat Apr 01 13:44:30 2017 -0700 # Node ID 62e377f673f3d9e10701d373b82f995085b54363 # Parent 5cc2f25b803a0184fdc4e67142b65df752e40284 util: validate and extract compression-related bundlespec parameters There is currently an experimental config option used by `hg bundle` to declare the compression level to be used by the compression engine. I implemented this just before the 4.1 freeze so there would be a way to adjust the zstd compression level. (Because zstd is highly flexible and one may want to optimize for extreme speed or extreme compression.) My intent was always to formalize compression engine settings later. Now that we have formalized the *bundlespec* string and exposed it to users via documentation, we can start leaning on it - and its extensibility via parameters - to declare compression options. This patch introduces a compression engine method for validating and extracting relevant bundlespec parameters used by the compression engine. This allows each compression engine to define its own set of tunable settings. For the gzip and zstd engines, we have defined a method that turns the "compressionlevel" parameter into a "level" option for compression. I have plans to add more parameters to the zstd engine. But I'll add those in a follow-up. To prove this works, we call the new API from bundlespec parsing and add tests demonstrating failures. The added API returns a set of parameters relevant to the engine. While unused, my eventual intent is for bundlespec parsing to know when there are "unknown" parameters so it can warn about their presence. Since a compression engine could consume *any* parameter, it needs to tell the bundlespec parser which parameters are relevant to it. While this is a YAGNI violation, it will prevent future BC in the compression engine API. diff --git a/mercurial/exchange.py b/mercurial/exchange.py --- a/mercurial/exchange.py +++ b/mercurial/exchange.py @@ -163,8 +163,15 @@ def parsebundlespec(repo, spec, strict=T _('missing support for repository features: %s') % ', '.join(sorted(missingreqs))) + engine = util.compengines.forbundlename(compression) + + # Verify compression-related parameters with compression engine. + try: + engine.parsebundlespecparams(params) + except Exception as e: + raise error.UnsupportedBundleSpecification(str(e)) + if not externalnames: - engine = util.compengines.forbundlename(compression) compression = engine.bundletype()[1] version = _bundlespeccgversions[version] return compression, version, params diff --git a/mercurial/help/bundlespec.txt b/mercurial/help/bundlespec.txt --- a/mercurial/help/bundlespec.txt +++ b/mercurial/help/bundlespec.txt @@ -82,3 +82,7 @@ Examples ``zstd-v1`` This errors because ``zstd`` is not supported for ``v1`` types. + +``zstd-v2;compressionlevel=11`` + Produce a ``v2`` bundle with zstandard compression using compression level + ``11`` (which will take longer to produce a smaller bundle). diff --git a/mercurial/util.py b/mercurial/util.py --- a/mercurial/util.py +++ b/mercurial/util.py @@ -3276,6 +3276,20 @@ class compressionengine(object): """ return None + def parsebundlespecparams(self, params): + """Parse *bundlespec* parameters to a dict of options. + + The *bundlespec* string may define key-value parameters. This method + is used to validate and extract parameters relevant to compression. + + Returns a 2-tuple of a set of parameter names that are relevant to this + engine and a dict of options that will possibly be fed as the ``opts`` + argument into a subsequent compression operation. + + Exceptions can be raised to indicate an invalid parameter value. + """ + return set(), {} + def wireprotosupport(self): """Declare support for this compression format on the wire protocol. @@ -3362,9 +3376,33 @@ class _zlibengine(compressionengine): All Mercurial clients should support this format. The compression algorithm strikes a reasonable balance between compression ratio and size. + + The ``compressionlevel`` parameter defines the integer compression + level to use. The default (``-1``) is likely equivalent to ``6``. + ``0`` means no compression. ``9`` means maximum compression. """ return 'gzip', 'GZ' + def parsebundlespecparams(self, params): + relevant = set() + opts = {} + + if 'compressionlevel' in params: + relevant.add('compressionlevel') + try: + level = int(params['compressionlevel']) + except ValueError: + raise ValueError(_('compressionlevel "%s" is not an integer') % + params['compressionlevel']) + + if level < -1 or level > 9: + raise ValueError(_('zlib compression level must be between -1 ' + 'and 9')) + + opts['level'] = level + + return relevant, opts + def wireprotosupport(self): return compewireprotosupport('zlib', 20, 20) @@ -3568,9 +3606,40 @@ class _zstdengine(compressionengine): If this engine is available and backwards compatibility is not a concern, it is likely the best available engine. + + The ``compressionlevel`` parameter defines the integer compression + level to use. The default (``-1``) is equivalent to ``3``, which + should deliver better-than-gzip compression ratios while being + faster. Compression levels over ``10`` can yield significantly + smaller bundles at a cost of much slower execution. Higher compression + levels also require more memory for both compression and + decompression. Compression levels higher than ``19`` can require + hundreds of megabytes of memory and may exhaust memory in 32-bit + processes. """ return 'zstd', 'ZS' + def parsebundlespecparams(self, params): + relevant = set() + opts = {} + + if 'compressionlevel' in params: + relevant.add('compressionlevel') + try: + level = int(params['compressionlevel']) + except ValueError: + raise ValueError(_('compressionlevel "%s" is not an integer') % + params['compressionlevel']) + + max_level = self._module.MAX_COMPRESSION_LEVEL + if level < -1 or level > max_level: + raise ValueError(_('zstd compression level must be between -1 ' + 'and %d') % max_level) + + opts['level'] = level + + return relevant, opts + def wireprotosupport(self): return compewireprotosupport('zstd', 50, 50) diff --git a/tests/test-bundle-type.t b/tests/test-bundle-type.t --- a/tests/test-bundle-type.t +++ b/tests/test-bundle-type.t @@ -48,6 +48,31 @@ Unknown compression type is rejected (see 'hg help bundlespec' for supported values for --type) [255] +Invalid compression levels are rejected + + $ hg bundle -a -t 'gzip-v2;compressionlevel=foo' out.hg + abort: compressionlevel "foo" is not an integer + (see 'hg help bundlespec' for supported values for --type) + [255] + + $ hg bundle -a -t 'gzip-v2;compressionlevel=10' out.hg + abort: zlib compression level must be between -1 and 9 + (see 'hg help bundlespec' for supported values for --type) + [255] + +#if zstd + $ hg bundle -a -t 'zstd-v2;compressionlevel=foo' out.hg + abort: compressionlevel "foo" is not an integer + (see 'hg help bundlespec' for supported values for --type) + [255] + + $ hg bundle -a -t 'zstd-v2;compressionlevel=42' out.hg + abort: zstd compression level must be between -1 and 22 + (see 'hg help bundlespec' for supported values for --type) + [255] + +#endif + $ cd .. test bundle types diff --git a/tests/test-help.t b/tests/test-help.t --- a/tests/test-help.t +++ b/tests/test-help.t @@ -1849,6 +1849,7 @@ Compression engines listed in `hg help b This engine will likely produce smaller bundles than "gzip" but will be "gzip" better compression than "gzip". It also frequently yields better + better-than-gzip compression ratios while being faster. Compression Test usage of section marks in help documents