Patchwork D10031: revlog-index: add `replace` method

login
register
mail settings
Submitter phabricator
Date Feb. 19, 2021, 11:17 a.m.
Message ID <differential-rev-PHID-DREV-u5axsxsi6c4esnxxb7yd-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48345/
State New
Headers show

Comments

phabricator - Feb. 19, 2021, 11:17 a.m.
Alphare created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  During a `pull` operation where the server does not provide sidedata, the client
  that requires it should generate them on-the-fly. In the generic case, we need
  to wait for the changelog + manifests + filelogs to be added, since we don't
  know what the sidedata computers might need: this means rewriting the index
  entries from within the pull transaction (and no further back) right after we've
  added them.
  
  The Python implementation does not guard against the accidental replacement of
  an entry that is not within the transaction, but the C implementation does.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10031

AFFECTED FILES
  mercurial/cext/revlog.c
  mercurial/pure/parsers.py

CHANGE DETAILS




To: Alphare, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -116,6 +116,23 @@ 
             r = (offset_type(0, gettype(r[0])),) + r[1:]
         return r
 
+    def replace(self, i, tup):
+        """
+        Replace an existing index entry with a new value. This should
+        not be used outside of the context of sidedata rewriting, inside the
+        transaction that creates the revision `i`.
+        """
+        if i < 0:
+            raise KeyError
+        self._check_index(i)
+        if i >= self._lgt:
+            self._extra[i - self._lgt] = _pack(self.index_format, *tup)
+        else:
+            index = self._calculate_index(i)
+            self._data[index : index + self.index_size] = _pack(
+                self.index_format, *tup
+            )
+
 
 class IndexObject(BaseIndexObject):
     def __init__(self, data):
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -464,6 +464,80 @@ 
 	Py_RETURN_NONE;
 }
 
+/* Replace an existing index entry with a new value. This should not be used
+   outside of the context of sidedata rewriting, inside the transaction that
+   creates the given revision. */
+static PyObject *index_replace(indexObject *self, PyObject *args)
+{
+	uint64_t offset_flags, unified_revlog_id, sidedata_offset;
+	int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
+	Py_ssize_t c_node_id_len, rank, sidedata_comp_len;
+	const char *c_node_id;
+	char *data;
+	PyObject *obj;
+
+	if (self->hdrsize == v1_hdrsize || self->inlined) {
+		/*
+		 There is a bug in the transaction handling when going from an
+	   inline revlog to a separate index and data file. Turn it off until
+	   it's fixed, since v2 revlogs sometimes get rewritten on exchange.
+	   See issue6485.
+	  */
+		raise_revlog_error();
+		return NULL;
+	}
+	if (!PyArg_ParseTuple(args, "nO", &rev, &obj))
+		return NULL;
+
+	if (rev < 0 || rev >= index_length(self)) {
+		PyErr_SetString(PyExc_IndexError, "revision outside index");
+		return NULL;
+	}
+	if (rev < self->length) {
+		PyErr_SetString(
+		    PyExc_IndexError,
+		    "cannot rewrite entries outside of this transaction");
+		return NULL;
+	}
+
+	if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags, &comp_len,
+	                      &uncomp_len, &base_rev, &link_rev, &parent_1,
+	                      &parent_2, &c_node_id, &c_node_id_len,
+	                      &unified_revlog_id, &rank, &sidedata_offset,
+	                      &sidedata_comp_len)) {
+		PyErr_SetString(PyExc_TypeError, "12-tuple required");
+		return NULL;
+	}
+
+	if (c_node_id_len != self->nodelen) {
+		PyErr_SetString(PyExc_TypeError, "invalid node");
+		return NULL;
+	}
+
+	/* Rewrite the newly added node, offset from the "already on-disk"
+	 * length */
+	data = self->added + self->hdrsize * (rev - self->length);
+	putbe32(offset_flags >> 32, data);
+	putbe32(offset_flags & 0xffffffffU, data + 4);
+	putbe32(comp_len, data + 8);
+	putbe32(uncomp_len, data + 12);
+	putbe32(base_rev, data + 16);
+	putbe32(link_rev, data + 20);
+	putbe32(parent_1, data + 24);
+	putbe32(parent_2, data + 28);
+	memcpy(data + 32, c_node_id, c_node_id_len);
+	/* Padding since SHA-1 is only 20 bytes for now */
+	memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
+	putbe64(unified_revlog_id, data + 64);
+	putbe32(rank, data + 72);
+	putbe64(sidedata_offset, data + 76);
+	putbe32(sidedata_comp_len, data + 84);
+	/* Padding for 96 bytes alignment */
+	memset(data + 88, 0, self->hdrsize - 88);
+
+	Py_RETURN_NONE;
+}
+
 static PyObject *index_stats(indexObject *self)
 {
 	PyObject *obj = PyDict_New();
@@ -2795,6 +2869,8 @@ 
      "compute phases"},
     {"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
      "reachableroots"},
+    {"replace", (PyCFunction)index_replace, METH_VARARGS,
+     "replace an existing index entry with a new value"},
     {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
      "get head revisions"}, /* Can do filtering since 3.2 */
     {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,