Patchwork D10652: revlog: implement a "default compression" mode

login
register
mail settings
Submitter phabricator
Date May 4, 2021, 2:20 p.m.
Message ID <differential-rev-PHID-DREV-b3lyzdfcarnu35w7fngl-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48971/
State Superseded
Headers show

Comments

phabricator - May 4, 2021, 2:20 p.m.
marmoute created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  The revlog docker is now storing a default compression engine. When a chunk use
  that compression, a dedicated mode is used in the revlog entry and we can
  directly route it to the right decompressor.
  
  We should probably make PLAIN and DEFAULT mode the only available mode for
  revlogv2, but this is something for later.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10652

AFFECTED FILES
  mercurial/revlog.py
  mercurial/revlogutils/constants.py
  mercurial/revlogutils/docket.py

CHANGE DETAILS




To: marmoute, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/mercurial/revlogutils/docket.py b/mercurial/revlogutils/docket.py
--- a/mercurial/revlogutils/docket.py
+++ b/mercurial/revlogutils/docket.py
@@ -21,6 +21,7 @@ 
 
 from .. import (
     error,
+    util,
 )
 
 from . import (
@@ -36,7 +37,8 @@ 
 # * 8 bytes: pending size of index-data
 # * 8 bytes: size of data
 # * 8 bytes: pending size of data
-S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLL')
+# * 1 bytes: default compression header
+S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLLc')
 
 
 class RevlogDocket(object):
@@ -51,6 +53,7 @@ 
         pending_index_end=0,
         data_end=0,
         pending_data_end=0,
+        default_compression_header=None,
     ):
         self._version_header = version_header
         self._read_only = bool(use_pending)
@@ -71,6 +74,7 @@ 
         else:
             self._index_end = self._initial_index_end
             self._data_end = self._initial_data_end
+        self.default_compression_header = default_compression_header
 
     def index_filepath(self):
         """file path to the current index file associated to this docket"""
@@ -134,6 +138,7 @@ 
             self._index_end,
             official_data_end,
             self._data_end,
+            self.default_compression_header,
         )
         return S_HEADER.pack(*data)
 
@@ -142,7 +147,12 @@ 
     """given a revlog version a new docket object for the given revlog"""
     if (version_header & 0xFFFF) != constants.REVLOGV2:
         return None
-    docket = RevlogDocket(revlog, version_header=version_header)
+    comp = util.compengines[revlog._compengine].revlogheader()
+    docket = RevlogDocket(
+        revlog,
+        version_header=version_header,
+        default_compression_header=comp,
+    )
     docket._dirty = True
     return docket
 
@@ -155,6 +165,7 @@ 
     pending_index_size = header[2]
     data_size = header[3]
     pending_data_size = header[4]
+    default_compression_header = header[5]
     docket = RevlogDocket(
         revlog,
         use_pending=use_pending,
@@ -163,5 +174,6 @@ 
         pending_index_end=pending_index_size,
         data_end=data_size,
         pending_data_end=pending_data_size,
+        default_compression_header=default_compression_header,
     )
     return docket
diff --git a/mercurial/revlogutils/constants.py b/mercurial/revlogutils/constants.py
--- a/mercurial/revlogutils/constants.py
+++ b/mercurial/revlogutils/constants.py
@@ -117,6 +117,7 @@ 
 
 ## chunk compression mode constants:
 # chunk use a compression stored "inline" at the start of the chunk itself.
+COMP_MODE_DEFAULT = 1
 COMP_MODE_INLINE = 2
 COMP_MODE_PLAIN = 0
 
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -35,6 +35,7 @@ 
 from .pycompat import getattr
 from .revlogutils.constants import (
     ALL_KINDS,
+    COMP_MODE_DEFAULT,
     COMP_MODE_INLINE,
     COMP_MODE_PLAIN,
     FEATURES_BY_VERSION,
@@ -706,6 +707,15 @@ 
         engine = util.compengines[self._compengine]
         return engine.revlogcompressor(self._compengineopts)
 
+    @util.propertycache
+    def _decompressor(self):
+        """the default decompressor"""
+        if self._docket is None:
+            return None
+        t = self._docket.default_compression_header
+        c = self._get_decompressor(t)
+        return c.decompress
+
     def _indexfp(self):
         """file object for the revlog's index file"""
         return self.opener(self._indexfile, mode=b"r")
@@ -1774,6 +1784,8 @@ 
         data = self._getsegmentforrevs(rev, rev, df=df)[1]
         if compression_mode == COMP_MODE_PLAIN:
             return data
+        elif compression_mode == COMP_MODE_DEFAULT:
+            return self._decompressor(data)
         elif compression_mode == COMP_MODE_INLINE:
             return self.decompress(data)
         else:
@@ -1827,6 +1839,8 @@ 
                 return [self._chunk(rev, df=df) for rev in revschunk]
 
             decomp = self.decompress
+            # self._decompressor might be None, but will not be used in that case
+            def_decomp = self._decompressor
             for rev in revschunk:
                 chunkstart = start(rev)
                 if inline:
@@ -1838,6 +1852,8 @@ 
                     ladd(c)
                 elif comp_mode == COMP_MODE_INLINE:
                     ladd(decomp(c))
+                elif comp_mode == COMP_MODE_DEFAULT:
+                    ladd(def_decomp(c))
                 else:
                     msg = 'unknown compression mode %d'
                     msg %= comp_mode
@@ -2487,8 +2503,12 @@ 
             if not h and not d:
                 # not data to store at all... declare them uncompressed
                 compression_mode = COMP_MODE_PLAIN
-            elif not h and d[0:1] == b'\0':
-                compression_mode = COMP_MODE_PLAIN
+            elif not h:
+                t = d[0:1]
+                if t == b'\0':
+                    compression_mode = COMP_MODE_PLAIN
+                elif t == self._docket.default_compression_header:
+                    compression_mode = COMP_MODE_DEFAULT
             elif h == b'u':
                 # we have a more efficient way to declare uncompressed
                 h = b''