Patchwork D10508: revlog: add a `entry_binary` method on index

login
register
mail settings
Submitter phabricator
Date April 28, 2021, 3:08 p.m.
Message ID <differential-rev-PHID-DREV-3f7f3siqopjgbllpnpb6-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48828/
State Superseded
Headers show

Comments

phabricator - April 28, 2021, 3:08 p.m.
marmoute created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  The revlog index is already responsible for unpacking the binary entry, it would be
  simpler to make it responsible for packing them. In practice the C version of
  the index is already doing this internally.
  
  We introduce a "entry_binary" method that return the binary version of an
  existing revision. The method currently need to also take the revlog header to
  deal with the "first revision" special case. We will introduce further refactor
  in a later changeset to split that logic out.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10508

AFFECTED FILES
  mercurial/cext/revlog.c
  mercurial/pure/parsers.py
  mercurial/revlog.py
  rust/hg-cpython/src/revlog.rs

CHANGE DETAILS




To: marmoute, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-cpython/src/revlog.rs b/rust/hg-cpython/src/revlog.rs
--- a/rust/hg-cpython/src/revlog.rs
+++ b/rust/hg-cpython/src/revlog.rs
@@ -172,6 +172,11 @@ 
         self.call_cindex(py, "clearcaches", args, kw)
     }
 
+    /// return the raw binary string representing a revision
+    def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
+        self.call_cindex(py, "entry_binary", args, kw)
+    }
+
     /// get an index entry
     def get(&self, *args, **kw) -> PyResult<PyObject> {
         self.call_cindex(py, "get", args, kw)
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -268,6 +268,24 @@ 
             return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
         return list.__getitem__(self, i)
 
+    def entry_binary(self, rev, header):
+        """return the raw binary string representing a revision"""
+        entry = self[rev]
+        if gettype(entry[0]):
+            raise error.RevlogError(
+                _(b'index entry flags need revlog version 1')
+            )
+        e2 = (
+            getoffset(entry[0]),
+            entry[1],
+            entry[3],
+            entry[4],
+            self[entry[5]][7],
+            self[entry[6]][7],
+            entry[7],
+        )
+        return INDEX_ENTRY_V0.pack(*e2)
+
 
 class revlogoldio(object):
     def parseindex(self, data, inline):
@@ -298,29 +316,6 @@ 
         index = revlogoldindex(index)
         return index, None
 
-    def packentry(self, entry, node, version, rev):
-        """return the binary representation of an entry
-
-        entry:   a tuple containing all the values (see index.__getitem__)
-        node:    a callback to convert a revision to nodeid
-        version: the changelog version
-        rev:     the revision number
-        """
-        if gettype(entry[0]):
-            raise error.RevlogError(
-                _(b'index entry flags need revlog version 1')
-            )
-        e2 = (
-            getoffset(entry[0]),
-            entry[1],
-            entry[3],
-            entry[4],
-            node(entry[5]),
-            node(entry[6]),
-            entry[7],
-        )
-        return INDEX_ENTRY_V0.pack(*e2)
-
 
 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
 # signed integer)
@@ -333,24 +328,12 @@ 
         index, cache = parsers.parse_index2(data, inline)
         return index, cache
 
-    def packentry(self, entry, node, version, rev):
-        p = INDEX_ENTRY_V1.pack(*entry)
-        if rev == 0:
-            p = INDEX_HEADER.pack(version) + p[4:]
-        return p
-
 
 class revlogv2io(object):
     def parseindex(self, data, inline):
         index, cache = parsers.parse_index2(data, inline, revlogv2=True)
         return index, cache
 
-    def packentry(self, entry, node, version, rev):
-        p = INDEX_ENTRY_V2.pack(*entry)
-        if rev == 0:
-            p = INDEX_HEADER.pack(version) + p[4:]
-        return p
-
 
 NodemapRevlogIO = None
 
@@ -2068,7 +2051,7 @@ 
             self._inline = False
             io = self._io
             for i in self:
-                e = io.packentry(self.index[i], self.node, self.version, i)
+                e = self.index.entry_binary(i, self.version)
                 fp.write(e)
 
             # the temp file replace the real index when we exit the context
@@ -2390,7 +2373,7 @@ 
             e = e[:8]
 
         self.index.append(e)
-        entry = self._io.packentry(e, self.node, self.version, curr)
+        entry = self.index.entry_binary(curr, self.version)
         self._writeentry(
             transaction,
             ifh,
@@ -3243,5 +3226,5 @@ 
             for i, entry in enumerate(new_entries):
                 rev = startrev + i
                 self.index.replace_sidedata_info(rev, entry[8], entry[9])
-                packed = self._io.packentry(entry, self.node, self.version, rev)
+                packed = self.index.entry_binary(rev, self.version)
                 fp.write(packed)
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -127,10 +127,24 @@ 
             r = (offset_type(0, gettype(r[0])),) + r[1:]
         return r
 
+    def entry_binary(self, rev, header):
+        """return the raw binary string representing a revision"""
+        entry = self[rev]
+        p = revlog_constants.INDEX_ENTRY_V1.pack(*entry)
+        if rev == 0:
+            v_fmt = revlog_constants.INDEX_HEADER
+            v_bin = v_fmt.pack(header)
+            p = v_bin + p[v_fmt.size :]
+        return p
+
 
 class IndexObject(BaseIndexObject):
     def __init__(self, data):
-        assert len(data) % self.entry_size == 0
+        assert len(data) % self.entry_size == 0, (
+            len(data),
+            self.entry_size,
+            len(data) % self.entry_size,
+        )
         self._data = data
         self._lgt = len(data) // self.entry_size
         self._extra = []
@@ -272,6 +286,16 @@ 
             msg = b"cannot rewrite entries outside of this transaction"
             raise KeyError(msg)
 
+    def entry_binary(self, rev, header):
+        """return the raw binary string representing a revision"""
+        entry = self[rev]
+        p = revlog_constants.INDEX_ENTRY_V2.pack(*entry)
+        if rev == 0:
+            v_fmt = revlog_constants.INDEX_HEADER
+            v_bin = v_fmt.pack(header)
+            p = v_bin + p[v_fmt.size :]
+        return p
+
 
 class IndexObject2(Index2Mixin, IndexObject):
     pass
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -342,6 +342,38 @@ 
 		                     sidedata_offset, sidedata_comp_len);
 	}
 }
+/*
+ * Return the raw binary string representing a revision
+ */
+static PyObject *index_entry_binary(indexObject *self, PyObject *args)
+{
+	long rev;
+	int header;
+	const char *data;
+	char entry[v2_hdrsize];
+
+	Py_ssize_t length = index_length(self);
+
+	if (!PyArg_ParseTuple(args, "lI", &rev, &header)) {
+		return NULL;
+	}
+	if (rev < 0 || rev >= length) {
+		PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
+		             rev);
+		return NULL;
+	};
+
+	data = index_deref(self, rev);
+	if (data == NULL)
+		return NULL;
+	if (rev == 0) {
+		// put the header at the start of the first entry
+		memcpy(entry, data, self->hdrsize);
+		putbe32(header, entry);
+		return PyBytes_FromStringAndSize(entry, self->hdrsize);
+	}
+	return PyBytes_FromStringAndSize(data, self->hdrsize);
+}
 
 /*
  * Return the hash of node corresponding to the given rev.
@@ -2859,6 +2891,8 @@ 
     {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
      "find length of shortest hex nodeid of a binary ID"},
     {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
+    {"entry_binary", (PyCFunction)index_entry_binary, METH_VARARGS,
+     "return an entry in binary form"},
     {NULL} /* Sentinel */
 };