Patchwork D10510: revlog: have an explicit "pack_header" method

login
register
mail settings
Submitter phabricator
Date April 28, 2021, 3:08 p.m.
Message ID <differential-rev-PHID-DREV-sqm5x5qw2liqzbzihwwk-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48829/
State Superseded
Headers show

Comments

phabricator - April 28, 2021, 3:08 p.m.
marmoute created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Having to pass the version header when retrieving the binary version of every single entry is a bit silly. So we extract that special logic in its own method. This also prepare the move to newer revlog format, not storing the header within an actual entry…

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10510

AFFECTED FILES
  mercurial/cext/revlog.c
  mercurial/pure/parsers.py
  mercurial/revlog.py
  rust/hg-cpython/src/revlog.rs

CHANGE DETAILS




To: marmoute, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-cpython/src/revlog.rs b/rust/hg-cpython/src/revlog.rs
--- a/rust/hg-cpython/src/revlog.rs
+++ b/rust/hg-cpython/src/revlog.rs
@@ -177,6 +177,11 @@ 
         self.call_cindex(py, "entry_binary", args, kw)
     }
 
+    /// return a binary packed version of the header
+    def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
+        self.call_cindex(py, "pack_header", args, kw)
+    }
+
     /// get an index entry
     def get(&self, *args, **kw) -> PyResult<PyObject> {
         self.call_cindex(py, "get", args, kw)
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -266,7 +266,7 @@ 
             return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
         return list.__getitem__(self, i)
 
-    def entry_binary(self, rev, header):
+    def entry_binary(self, rev):
         """return the raw binary string representing a revision"""
         entry = self[rev]
         if gettype(entry[0]):
@@ -284,6 +284,10 @@ 
         )
         return INDEX_ENTRY_V0.pack(*e2)
 
+    def pack_header(self, header):
+        """Pack header information in binary"""
+        return b''
+
 
 def parse_index_v0(data, inline):
     s = INDEX_ENTRY_V0.size
@@ -2041,7 +2045,10 @@ 
             self.version &= ~FLAG_INLINE_DATA
             self._inline = False
             for i in self:
-                e = self.index.entry_binary(i, self.version)
+                e = self.index.entry_binary(i)
+                if i == 0:
+                    header = self.index.pack_header(self.version)
+                    e = header + e
                 fp.write(e)
 
             # the temp file replace the real index when we exit the context
@@ -2363,7 +2370,10 @@ 
             e = e[:8]
 
         self.index.append(e)
-        entry = self.index.entry_binary(curr, self.version)
+        entry = self.index.entry_binary(curr)
+        if curr == 0:
+            header = self.index.pack_header(self.version)
+            entry = header + entry
         self._writeentry(
             transaction,
             ifh,
@@ -3216,5 +3226,8 @@ 
             for i, entry in enumerate(new_entries):
                 rev = startrev + i
                 self.index.replace_sidedata_info(rev, entry[8], entry[9])
-                packed = self.index.entry_binary(rev, self.version)
+                packed = self.index.entry_binary(rev)
+                if rev == 0:
+                    header = self.index.pack_header(self.version)
+                    packed = header + packed
                 fp.write(packed)
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -127,14 +127,17 @@ 
             r = (offset_type(0, gettype(r[0])),) + r[1:]
         return r
 
-    def entry_binary(self, rev, header):
+    def pack_header(self, header):
+        """pack header information as binary"""
+        v_fmt = revlog_constants.INDEX_HEADER
+        return v_fmt.pack(header)
+
+    def entry_binary(self, rev):
         """return the raw binary string representing a revision"""
         entry = self[rev]
         p = revlog_constants.INDEX_ENTRY_V1.pack(*entry)
         if rev == 0:
-            v_fmt = revlog_constants.INDEX_HEADER
-            v_bin = v_fmt.pack(header)
-            p = v_bin + p[v_fmt.size :]
+            p = p[revlog_constants.INDEX_HEADER.size :]
         return p
 
 
@@ -286,14 +289,12 @@ 
             msg = b"cannot rewrite entries outside of this transaction"
             raise KeyError(msg)
 
-    def entry_binary(self, rev, header):
+    def entry_binary(self, rev):
         """return the raw binary string representing a revision"""
         entry = self[rev]
         p = revlog_constants.INDEX_ENTRY_V2.pack(*entry)
         if rev == 0:
-            v_fmt = revlog_constants.INDEX_HEADER
-            v_bin = v_fmt.pack(header)
-            p = v_bin + p[v_fmt.size :]
+            p = p[revlog_constants.INDEX_HEADER.size :]
         return p
 
 
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -343,18 +343,28 @@ 
 	}
 }
 /*
+ * Pack header information in binary
+ */
+static PyObject *index_pack_header(indexObject *self, PyObject *args)
+{
+	int header;
+	char out[4];
+	if (!PyArg_ParseTuple(args, "I", &header)) {
+		return NULL;
+	}
+	putbe32(header, out);
+	return PyBytes_FromStringAndSize(out, 4);
+}
+/*
  * Return the raw binary string representing a revision
  */
-static PyObject *index_entry_binary(indexObject *self, PyObject *args)
+static PyObject *index_entry_binary(indexObject *self, PyObject *value)
 {
 	long rev;
-	int header;
 	const char *data;
-	char entry[v2_hdrsize];
-
 	Py_ssize_t length = index_length(self);
 
-	if (!PyArg_ParseTuple(args, "lI", &rev, &header)) {
+	if (!pylong_to_long(value, &rev)) {
 		return NULL;
 	}
 	if (rev < 0 || rev >= length) {
@@ -367,10 +377,8 @@ 
 	if (data == NULL)
 		return NULL;
 	if (rev == 0) {
-		/* put the header at the start of the first entry */
-		memcpy(entry, data, self->hdrsize);
-		putbe32(header, entry);
-		return PyBytes_FromStringAndSize(entry, self->hdrsize);
+		/* the header is eating the start of the first entry */
+		return PyBytes_FromStringAndSize(data + 4, self->hdrsize - 4);
 	}
 	return PyBytes_FromStringAndSize(data, self->hdrsize);
 }
@@ -2891,8 +2899,10 @@ 
     {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
      "find length of shortest hex nodeid of a binary ID"},
     {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
-    {"entry_binary", (PyCFunction)index_entry_binary, METH_VARARGS,
+    {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
      "return an entry in binary form"},
+    {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
+     "pack the revlog header information into binary"},
     {NULL} /* Sentinel */
 };