Patchwork D10646: revlog: add a "data compression mode" entry in the index tuple

login
register
mail settings
Submitter phabricator
Date May 4, 2021, 2:20 p.m.
Message ID <differential-rev-PHID-DREV-u225hxivn6rgyb3kacfc-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48969/
State Superseded
Headers show

Comments

phabricator - May 4, 2021, 2:20 p.m.
marmoute created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  That will make it possible to keep track of compression information in the
  revlog index, opening the way to more efficient revision restoration (in native
  code, but the python usage is already defeating performance work).
  
  We start with adding a new entry to the index tuple, using a value matching the
  current behavior. We will introduce storage and other value in later changesets.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10646

AFFECTED FILES
  mercurial/bundlerepo.py
  mercurial/cext/parsers.c
  mercurial/cext/revlog.c
  mercurial/policy.py
  mercurial/pure/parsers.py
  mercurial/revlog.py
  mercurial/revlogutils/constants.py
  mercurial/revlogutils/revlogv0.py
  mercurial/unionrepo.py
  tests/test-parseindex2.py

CHANGE DETAILS




To: marmoute, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/tests/test-parseindex2.py b/tests/test-parseindex2.py
--- a/tests/test-parseindex2.py
+++ b/tests/test-parseindex2.py
@@ -21,6 +21,9 @@ 
     policy,
     pycompat,
 )
+from mercurial.revlogutils import (
+    constants,
+)
 
 parsers = policy.importmod('parsers')
 
@@ -49,7 +52,7 @@ 
         cache = (0, data)
         while off <= l:
             e = struct.unpack(indexformatng, data[off : off + s])
-            e = e + (0, 0)
+            e = e + (0, 0, constants.COMP_MODE_INLINE)
             nodemap[e[7]] = n
             append(e)
             n += 1
@@ -59,7 +62,7 @@ 
     else:
         while off <= l:
             e = struct.unpack(indexformatng, data[off : off + s])
-            e = e + (0, 0)
+            e = e + (0, 0, constants.COMP_MODE_INLINE)
             nodemap[e[7]] = n
             append(e)
             n += 1
@@ -242,7 +245,19 @@ 
                 break
 
     def testminusone(self):
-        want = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
+        want = (
+            0,
+            0,
+            0,
+            -1,
+            -1,
+            -1,
+            -1,
+            sha1nodeconstants.nullid,
+            0,
+            0,
+            constants.COMP_MODE_INLINE,
+        )
         index, junk = parsers.parse_index2(data_inlined, True)
         got = index[-1]
         self.assertEqual(want, got)  # inline data
@@ -264,7 +279,20 @@ 
             # node won't matter for this test, let's just make sure
             # they don't collide. Other data don't matter either.
             node = hexrev(p1) + hexrev(p2) + b'.' * 12
-            index.append((0, 0, 12, 1, 34, p1, p2, node, 0, 0))
+            e = (
+                0,
+                0,
+                12,
+                1,
+                34,
+                p1,
+                p2,
+                node,
+                0,
+                0,
+                constants.COMP_MODE_INLINE,
+            )
+            index.append(e)
 
         appendrev(4)
         appendrev(5)
diff --git a/mercurial/unionrepo.py b/mercurial/unionrepo.py
--- a/mercurial/unionrepo.py
+++ b/mercurial/unionrepo.py
@@ -31,6 +31,10 @@ 
     vfs as vfsmod,
 )
 
+from .revlogutils import (
+    constants as revlog_constants,
+)
+
 
 class unionrevlog(revlog.revlog):
     def __init__(self, opener, radix, revlog2, linkmapper):
@@ -65,6 +69,7 @@ 
                 node,
                 _sdo,
                 _sds,
+                _dcm,
             ) = rev
             flags = _start & 0xFFFF
 
@@ -99,6 +104,7 @@ 
                 node,
                 0,  # sidedata offset
                 0,  # sidedata size
+                revlog_constants.COMP_MODE_INLINE,
             )
             self.index.append(e)
             self.bundlerevs.add(n)
diff --git a/mercurial/revlogutils/revlogv0.py b/mercurial/revlogutils/revlogv0.py
--- a/mercurial/revlogutils/revlogv0.py
+++ b/mercurial/revlogutils/revlogv0.py
@@ -9,6 +9,7 @@ 
 
 from ..node import sha1nodeconstants
 from .constants import (
+    COMP_MODE_INLINE,
     INDEX_ENTRY_V0,
 )
 from ..i18n import _
@@ -42,7 +43,19 @@ 
 
 class revlogoldindex(list):
     entry_size = INDEX_ENTRY_V0.size
-    null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
+    null_item = (
+        0,
+        0,
+        0,
+        -1,
+        -1,
+        -1,
+        -1,
+        sha1nodeconstants.nullid,
+        0,
+        0,
+        COMP_MODE_INLINE,
+    )
 
     @property
     def nodemap(self):
@@ -138,6 +151,7 @@ 
             e[6],
             0,  # no side data support
             0,  # no side data support
+            COMP_MODE_INLINE,
         )
         index.append(e2)
         nodemap[e[6]] = n
diff --git a/mercurial/revlogutils/constants.py b/mercurial/revlogutils/constants.py
--- a/mercurial/revlogutils/constants.py
+++ b/mercurial/revlogutils/constants.py
@@ -114,6 +114,10 @@ 
 # bitmark for flags that could cause rawdata content change
 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
 
+## chunk compression mode constants:
+# chunk use a compression stored "inline" at the start of the chunk itself.
+COMP_MODE_INLINE = 2
+
 SUPPORTED_FLAGS = {
     REVLOGV0: REVLOGV0_FLAGS,
     REVLOGV1: REVLOGV1_FLAGS,
@@ -152,4 +156,5 @@ 
     },
 }
 
+
 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -35,6 +35,7 @@ 
 from .pycompat import getattr
 from .revlogutils.constants import (
     ALL_KINDS,
+    COMP_MODE_INLINE,
     FEATURES_BY_VERSION,
     FLAG_GENERALDELTA,
     FLAG_INLINE_DATA,
@@ -336,6 +337,10 @@ 
 
     [9] sidedata chunk length:
             The size, in bytes, of the revision side-data chunk.
+
+    [10] data compression mode:
+            two bits that details the way the data chunk is compressed on disk.
+            (see "COMP_MODE_*" constant for details)
     """
 
     _flagserrorclass = error.RevlogError
@@ -2474,6 +2479,7 @@ 
             node,
             sidedata_offset,
             len(serialized_sidedata),
+            COMP_MODE_INLINE,
         )
 
         self.index.append(e)
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -54,7 +54,19 @@ 
     # Size of a C long int, platform independent
     int_size = struct.calcsize(b'>i')
     # An empty index entry, used as a default value to be overridden, or nullrev
-    null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
+    null_item = (
+        0,
+        0,
+        0,
+        -1,
+        -1,
+        -1,
+        -1,
+        sha1nodeconstants.nullid,
+        0,
+        0,
+        revlog_constants.COMP_MODE_INLINE,
+    )
 
     @util.propertycache
     def entry_size(self):
@@ -135,7 +147,7 @@ 
 
     def _unpack_entry(self, data):
         r = self.index_format.unpack(data)
-        r = r + (0, 0)
+        r = r + (0, 0, revlog_constants.COMP_MODE_INLINE)
         return r
 
     def pack_header(self, header):
@@ -303,16 +315,17 @@ 
             self._extra[rev - self._lgt] = new
 
     def _unpack_entry(self, data):
-        return self.index_format.unpack(data)
+        return self.index_format.unpack(data) + (
+            revlog_constants.COMP_MODE_INLINE,
+        )
 
     def _pack_entry(self, entry):
-        return self.index_format.pack(*entry)
+        return self.index_format.pack(*entry[:10])
 
     def entry_binary(self, rev):
         """return the raw binary string representing a revision"""
         entry = self[rev]
-        p = revlog_constants.INDEX_ENTRY_V2.pack(*entry)
-        return p
+        return self._pack_entry(entry)
 
     def pack_header(self, header):
         """pack header information as binary"""
diff --git a/mercurial/policy.py b/mercurial/policy.py
--- a/mercurial/policy.py
+++ b/mercurial/policy.py
@@ -80,7 +80,7 @@ 
     ('cext', 'bdiff'): 3,
     ('cext', 'mpatch'): 1,
     ('cext', 'osutil'): 4,
-    ('cext', 'parsers'): 18,
+    ('cext', 'parsers'): 19,
 }
 
 # map import request to other package or module
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -118,9 +118,9 @@ 
 static int index_find_node(indexObject *self, const char *node);
 
 #if LONG_MAX == 0x7fffffffL
-static const char *const tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki");
+static const char *const tuple_format = PY23("Kiiiiiis#KiB", "Kiiiiiiy#KiB");
 #else
-static const char *const tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki");
+static const char *const tuple_format = PY23("kiiiiiis#kiB", "kiiiiiiy#kiB");
 #endif
 
 /* A RevlogNG v1 index entry is 64 bytes long. */
@@ -132,6 +132,8 @@ 
 static const long format_v1 = 1; /* Internal only, could be any number */
 static const long format_v2 = 2; /* Internal only, could be any number */
 
+static const char comp_mode_inline = 2;
+
 static void raise_revlog_error(void)
 {
 	PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
@@ -294,6 +296,7 @@ 
 	uint64_t offset_flags, sidedata_offset;
 	int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
 	    sidedata_comp_len;
+	char data_comp_mode;
 	const char *c_node_id;
 	const char *data;
 	Py_ssize_t length = index_length(self);
@@ -340,9 +343,11 @@ 
 		sidedata_comp_len = getbe32(data + 72);
 	}
 
+	data_comp_mode = comp_mode_inline;
 	return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
 	                     base_rev, link_rev, parent_1, parent_2, c_node_id,
-	                     self->nodelen, sidedata_offset, sidedata_comp_len);
+	                     self->nodelen, sidedata_offset, sidedata_comp_len,
+	                     data_comp_mode);
 }
 /*
  * Pack header information in binary
@@ -443,6 +448,7 @@ 
 {
 	uint64_t offset_flags, sidedata_offset;
 	int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
+	char data_comp_mode;
 	Py_ssize_t c_node_id_len, sidedata_comp_len;
 	const char *c_node_id;
 	char *data;
@@ -450,8 +456,9 @@ 
 	if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
 	                      &uncomp_len, &base_rev, &link_rev, &parent_1,
 	                      &parent_2, &c_node_id, &c_node_id_len,
-	                      &sidedata_offset, &sidedata_comp_len)) {
-		PyErr_SetString(PyExc_TypeError, "10-tuple required");
+	                      &sidedata_offset, &sidedata_comp_len,
+	                      &data_comp_mode)) {
+		PyErr_SetString(PyExc_TypeError, "11-tuple required");
 		return NULL;
 	}
 
@@ -459,6 +466,12 @@ 
 		PyErr_SetString(PyExc_TypeError, "invalid node");
 		return NULL;
 	}
+	if (data_comp_mode != comp_mode_inline) {
+		PyErr_Format(PyExc_ValueError,
+		             "invalid data compression mode: %i",
+		             data_comp_mode);
+		return NULL;
+	}
 
 	if (self->new_length == self->added_length) {
 		size_t new_added_length =
@@ -2761,9 +2774,9 @@ 
 		self->entry_size = v1_entry_size;
 	}
 
-	self->nullentry =
-	    Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0, -1, -1,
-	                  -1, -1, nullid, self->nodelen, 0, 0);
+	self->nullentry = Py_BuildValue(PY23("iiiiiiis#iiB", "iiiiiiiy#iiB"), 0,
+	                                0, 0, -1, -1, -1, -1, nullid,
+	                                self->nodelen, 0, 0, comp_mode_inline);
 
 	if (!self->nullentry)
 		return -1;
diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c
--- a/mercurial/cext/parsers.c
+++ b/mercurial/cext/parsers.c
@@ -668,7 +668,7 @@ 
 void manifest_module_init(PyObject *mod);
 void revlog_module_init(PyObject *mod);
 
-static const int version = 18;
+static const int version = 19;
 
 static void module_init(PyObject *mod)
 {
diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py
--- a/mercurial/bundlerepo.py
+++ b/mercurial/bundlerepo.py
@@ -105,6 +105,7 @@ 
                 node,
                 0,
                 0,
+                revlog_constants.COMP_MODE_INLINE,
             )
             self.index.append(e)
             self.bundlerevs.add(n)