Patchwork D10650: revlog: introduce a plain compression mode

login
register
mail settings
Submitter phabricator
Date May 4, 2021, 2:20 p.m.
Message ID <differential-rev-PHID-DREV-dyyjp6ynny5kexrrrg66-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48968/
State Superseded
Headers show

Comments

phabricator - May 4, 2021, 2:20 p.m.
marmoute created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  That mode is simple it means the chunk contains uncompressed data and can be
  used directly.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10650

AFFECTED FILES
  mercurial/revlog.py
  mercurial/revlogutils/constants.py
  mercurial/revlogutils/deltas.py

CHANGE DETAILS




To: marmoute, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -553,6 +553,24 @@ 
     snapshotdepth = attr.ib()
 
 
+def drop_u_compression(delta):
+    """turn into a "u" (no-compression) into no-compression without header
+
+    This is useful for revlog format that has better compression method.
+    """
+    assert delta.data[0] == b'u', delta.data[0]
+    return _deltainfo(
+        delta.distance,
+        delta.deltalen - 1,
+        (b'', delta.data[1]),
+        delta.base,
+        delta.chainbase,
+        delta.chainlen,
+        delta.compresseddeltalen,
+        delta.snapshotdepth,
+    )
+
+
 def isgooddeltainfo(revlog, deltainfo, revinfo):
     """Returns True if the given delta is good. Good means that it is within
     the disk span, disk size, and chain length bounds that we know to be
diff --git a/mercurial/revlogutils/constants.py b/mercurial/revlogutils/constants.py
--- a/mercurial/revlogutils/constants.py
+++ b/mercurial/revlogutils/constants.py
@@ -118,6 +118,7 @@ 
 ## chunk compression mode constants:
 # chunk use a compression stored "inline" at the start of the chunk itself.
 COMP_MODE_INLINE = 2
+COMP_MODE_PLAIN = 0
 
 SUPPORTED_FLAGS = {
     REVLOGV0: REVLOGV0_FLAGS,
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -36,6 +36,7 @@ 
 from .revlogutils.constants import (
     ALL_KINDS,
     COMP_MODE_INLINE,
+    COMP_MODE_PLAIN,
     FEATURES_BY_VERSION,
     FLAG_GENERALDELTA,
     FLAG_INLINE_DATA,
@@ -1755,7 +1756,16 @@ 
 
         Returns a str holding uncompressed data for the requested revision.
         """
-        return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
+        compression_mode = self.index[rev][10]
+        data = self._getsegmentforrevs(rev, rev, df=df)[1]
+        if compression_mode == COMP_MODE_PLAIN:
+            return data
+        elif compression_mode == COMP_MODE_INLINE:
+            return self.decompress(data)
+        else:
+            msg = 'unknown compression mode %d'
+            msg %= compression_mode
+            raise error.RevlogError(msg)
 
     def _chunks(self, revs, df=None, targetsize=None):
         """Obtain decompressed chunks for the specified revisions.
@@ -1808,8 +1818,16 @@ 
                 if inline:
                     chunkstart += (rev + 1) * iosize
                 chunklength = length(rev)
+                comp_mode = self.index[rev][10]
                 c = buffer(data, chunkstart - offset, chunklength)
-                ladd(decomp(c))
+                if comp_mode == COMP_MODE_PLAIN:
+                    ladd(c)
+                elif comp_mode == COMP_MODE_INLINE:
+                    ladd(decomp(c))
+                else:
+                    msg = 'unknown compression mode %d'
+                    msg %= comp_mode
+                    raise error.RevlogError(msg)
 
         return l
 
@@ -2459,6 +2477,20 @@ 
 
         deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
 
+        compression_mode = COMP_MODE_INLINE
+        if self._docket is not None:
+            h, d = deltainfo.data
+            if not h and not d:
+                # not data to store at all... declare them uncompressed
+                compression_mode = COMP_MODE_PLAIN
+            elif not h and d[0:1] == b'\0':
+                compression_mode = COMP_MODE_PLAIN
+            elif h == b'u':
+                # we have a more efficient way to declare uncompressed
+                h = b''
+                compression_mode = COMP_MODE_PLAIN
+                deltainfo = deltautil.drop_u_compression(deltainfo)
+
         if sidedata and self.hassidedata:
             serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
             sidedata_offset = offset + deltainfo.deltalen
@@ -2480,7 +2512,7 @@ 
             node,
             sidedata_offset,
             len(serialized_sidedata),
-            COMP_MODE_INLINE,
+            compression_mode,
         )
 
         self.index.append(e)