Patchwork D10656: revlog: compress sidedata when doing "post-pull" sidedata update

login
register
mail settings
Submitter phabricator
Date May 4, 2021, 2:20 p.m.
Message ID <differential-rev-PHID-DREV-ke45iqyxlq4efhnx4a7u-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48976/
State Superseded
Headers show

Comments

phabricator - May 4, 2021, 2:20 p.m.
marmoute created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  All path writing sidedata are now using compression (when appropriate).

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10656

AFFECTED FILES
  mercurial/cext/revlog.c
  mercurial/pure/parsers.py
  mercurial/revlog.py

CHANGE DETAILS




To: marmoute, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -3380,6 +3380,26 @@ 
                 serialized_sidedata = sidedatautil.serialize_sidedata(
                     new_sidedata
                 )
+
+                sidedata_compression_mode = COMP_MODE_INLINE
+                if serialized_sidedata and self.hassidedata:
+                    sidedata_compression_mode = COMP_MODE_PLAIN
+                    h, comp_sidedata = self.compress(serialized_sidedata)
+                    if (
+                        h != b'u'
+                        and comp_sidedata[0] != b'\0'
+                        and len(comp_sidedata) < len(serialized_sidedata)
+                    ):
+                        assert not h
+                        if (
+                            comp_sidedata[0]
+                            == self._docket.default_compression_header
+                        ):
+                            sidedata_compression_mode = COMP_MODE_DEFAULT
+                            serialized_sidedata = comp_sidedata
+                        else:
+                            sidedata_compression_mode = COMP_MODE_INLINE
+                            serialized_sidedata = comp_sidedata
                 if entry[8] != 0 or entry[9] != 0:
                     # rewriting entries that already have sidedata is not
                     # supported yet, because it introduces garbage data in the
@@ -3394,6 +3414,7 @@ 
                     current_offset,
                     len(serialized_sidedata),
                     new_offset_flags,
+                    sidedata_compression_mode,
                 )
 
                 # the sidedata computation might have move the file cursors around
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -297,7 +297,12 @@ 
     index_format = revlog_constants.INDEX_ENTRY_V2
 
     def replace_sidedata_info(
-        self, rev, sidedata_offset, sidedata_length, offset_flags
+        self,
+        rev,
+        sidedata_offset,
+        sidedata_length,
+        offset_flags,
+        compression_mode,
     ):
         """
         Replace an existing index entry's sidedata offset and length with new
@@ -316,6 +321,7 @@ 
             entry[0] = offset_flags
             entry[8] = sidedata_offset
             entry[9] = sidedata_length
+            entry[11] = compression_mode
             entry = tuple(entry)
             new = self._pack_entry(entry)
             self._extra[rev - self._lgt] = new
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -533,12 +533,13 @@ 
 {
 	uint64_t offset_flags, sidedata_offset;
 	int rev;
+	char comp_mode;
 	Py_ssize_t sidedata_comp_len;
 	char *data;
 #if LONG_MAX == 0x7fffffffL
-	const char *const sidedata_format = PY23("nKiK", "nKiK");
+	const char *const sidedata_format = PY23("nKiKB", "nKiKB");
 #else
-	const char *const sidedata_format = PY23("nkik", "nkik");
+	const char *const sidedata_format = PY23("nkikB", "nkikB");
 #endif
 
 	if (self->entry_size == v1_entry_size || self->inlined) {
@@ -553,7 +554,7 @@ 
 	}
 
 	if (!PyArg_ParseTuple(args, sidedata_format, &rev, &sidedata_offset,
-	                      &sidedata_comp_len, &offset_flags))
+	                      &sidedata_comp_len, &offset_flags, &comp_mode))
 		return NULL;
 
 	if (rev < 0 || rev >= index_length(self)) {
@@ -573,6 +574,7 @@ 
 	putbe64(offset_flags, data);
 	putbe64(sidedata_offset, data + 64);
 	putbe32(sidedata_comp_len, data + 72);
+	data[76] = (data[76] & ~(3 << 2)) | ((comp_mode & 3) << 2);
 
 	Py_RETURN_NONE;
 }