Patchwork D9846: cext: add support for revlogv2

login
register
mail settings
Submitter phabricator
Date Jan. 20, 2021, 8:46 p.m.
Message ID <differential-rev-PHID-DREV-yrezmdbloxdtoa6nq3ro-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48155/
State New
Headers show

Comments

phabricator - Jan. 20, 2021, 8:46 p.m.
Alphare created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D9846

AFFECTED FILES
  mercurial/cext/parsers.c
  mercurial/cext/revlog.c
  mercurial/revlog.py

CHANGE DETAILS




To: Alphare, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -69,7 +69,6 @@ 
     templatefilters,
     util,
 )
-from .pure import parsers as pureparsers
 from .interfaces import (
     repository,
     util as interfaceutil,
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -98,6 +98,7 @@ 
 	int ntlookups;          /* # lookups */
 	int ntmisses;           /* # lookups that miss the cache */
 	int inlined;
+	long hdrsize; /* size of index headers. Differs in v1 v.s. v2 format */
 };
 
 static Py_ssize_t index_length(const indexObject *self)
@@ -113,14 +114,19 @@ 
 static int index_find_node(indexObject *self, const char *node);
 
 #if LONG_MAX == 0x7fffffffL
-static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
+static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
+static const char *const v2_tuple_format = PY23("Kiiiiiis#KiKi", "Kiiiiiiy#KiKi");
 #else
-static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
+static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
+static const char *const v2_tuple_format = PY23("kiiiiiis#kiki", "kiiiiiiy#kiki");
 #endif
 
 /* A RevlogNG v1 index entry is 64 bytes long. */
 static const long v1_hdrsize = 64;
 
+/* A Revlogv2 index entry is 96 bytes long. */
+static const long v2_hdrsize = 96;
+
 static void raise_revlog_error(void)
 {
 	PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
@@ -157,7 +163,7 @@ 
 static const char *index_deref(indexObject *self, Py_ssize_t pos)
 {
 	if (pos >= self->length)
-		return self->added + (pos - self->length) * v1_hdrsize;
+		return self->added + (pos - self->length) * self->hdrsize;
 
 	if (self->inlined && pos > 0) {
 		if (self->offsets == NULL) {
@@ -174,7 +180,7 @@ 
 		return self->offsets[pos];
 	}
 
-	return (const char *)(self->buf.buf) + pos * v1_hdrsize;
+	return (const char *)(self->buf.buf) + pos * self->hdrsize;
 }
 
 /*
@@ -280,8 +286,9 @@ 
  */
 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
 {
-	uint64_t offset_flags;
-	int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
+	uint64_t offset_flags, sidedata_offset, unified_revlog_id;
+	int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
+    sidedata_comp_len, rank;
 	const char *c_node_id;
 	const char *data;
 	Py_ssize_t length = index_length(self);
@@ -320,9 +327,22 @@ 
 	parent_2 = getbe32(data + 28);
 	c_node_id = data + 32;
 
-	return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
-	                     base_rev, link_rev, parent_1, parent_2, c_node_id,
-	                     self->nodelen);
+  if (self->hdrsize == v1_hdrsize) {
+    return Py_BuildValue(v1_tuple_format, offset_flags, comp_len, uncomp_len,
+                         base_rev, link_rev, parent_1, parent_2, c_node_id,
+                         self->nodelen);
+  } else {
+    unified_revlog_id = getbe64(data + 64);
+    rank = getbe32(data + 72);
+    sidedata_offset = getbe64(data + 76);
+    sidedata_comp_len = getbe32(data + 84);
+
+    return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
+                         uncomp_len, base_rev, link_rev, parent_1,
+                         parent_2, c_node_id, self->nodelen,
+                         unified_revlog_id, rank,
+                         sidedata_offset, sidedata_comp_len);
+  }
 }
 
 /*
@@ -373,18 +393,30 @@ 
 
 static PyObject *index_append(indexObject *self, PyObject *obj)
 {
-	uint64_t offset_flags;
+	uint64_t offset_flags, unified_revlog_id, sidedata_offset;
 	int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
-	Py_ssize_t c_node_id_len;
+	Py_ssize_t c_node_id_len, rank, sidedata_comp_len;
 	const char *c_node_id;
 	char *data;
 
-	if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
-	                      &uncomp_len, &base_rev, &link_rev, &parent_1,
-	                      &parent_2, &c_node_id, &c_node_id_len)) {
-		PyErr_SetString(PyExc_TypeError, "8-tuple required");
-		return NULL;
+  if (self->hdrsize == v1_hdrsize) {
+    if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags, &comp_len,
+                          &uncomp_len, &base_rev, &link_rev, &parent_1,
+                          &parent_2, &c_node_id, &c_node_id_len)) {
+      PyErr_SetString(PyExc_TypeError, "8-tuple required");
+      return NULL;
+    }
+	} else {
+    if (!PyArg_ParseTuple(obj, v2_tuple_format, &offset_flags, &comp_len,
+                          &uncomp_len, &base_rev, &link_rev, &parent_1,
+                          &parent_2, &c_node_id, &c_node_id_len,
+                          &unified_revlog_id, &rank, &sidedata_offset,
+                          &sidedata_comp_len)) {
+      PyErr_SetString(PyExc_TypeError, "12-tuple required");
+      return NULL;
+    }
 	}
+
 	if (c_node_id_len != self->nodelen) {
 		PyErr_SetString(PyExc_TypeError, "invalid node");
 		return NULL;
@@ -394,14 +426,14 @@ 
 		size_t new_added_length =
 		    self->added_length ? self->added_length * 2 : 4096;
 		void *new_added =
-		    PyMem_Realloc(self->added, new_added_length * v1_hdrsize);
+		    PyMem_Realloc(self->added, new_added_length * self->hdrsize);
 		if (!new_added)
 			return PyErr_NoMemory();
 		self->added = new_added;
 		self->added_length = new_added_length;
 	}
 	rev = self->length + self->new_length;
-	data = self->added + v1_hdrsize * self->new_length++;
+	data = self->added + self->hdrsize * self->new_length++;
 	putbe32(offset_flags >> 32, data);
 	putbe32(offset_flags & 0xffffffffU, data + 4);
 	putbe32(comp_len, data + 8);
@@ -411,7 +443,17 @@ 
 	putbe32(parent_1, data + 24);
 	putbe32(parent_2, data + 28);
 	memcpy(data + 32, c_node_id, c_node_id_len);
+	/* Padding since SHA-1 is only 20 bytes for now */
 	memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
+	if (self->hdrsize != v1_hdrsize) {
+		putbe64(unified_revlog_id, data + 64);
+		putbe32(rank, data + 72);
+		putbe64(sidedata_offset, data + 76);
+		putbe32(sidedata_comp_len, data + 84);
+	  /* Padding for 96 bytes alignment */
+		memset(data + 88, 0, self->hdrsize - 88);
+	}
+
 
 	if (self->ntinitialized)
 		nt_insert(&self->nt, c_node_id, rev);
@@ -2563,14 +2605,17 @@ 
 	const char *data = (const char *)self->buf.buf;
 	Py_ssize_t pos = 0;
 	Py_ssize_t end = self->buf.len;
-	long incr = v1_hdrsize;
+	long incr = self->hdrsize;
 	Py_ssize_t len = 0;
 
-	while (pos + v1_hdrsize <= end && pos >= 0) {
-		uint32_t comp_len;
+	while (pos + self->hdrsize <= end && pos >= 0) {
+		uint32_t comp_len, sidedata_comp_len = 0;
 		/* 3rd element of header is length of compressed inline data */
 		comp_len = getbe32(data + pos + 8);
-		incr = v1_hdrsize + comp_len;
+		if (self->hdrsize == v2_hdrsize) {
+			sidedata_comp_len = getbe32(data + pos + 84);
+		}
+		incr = self->hdrsize + comp_len + sidedata_comp_len;
 		if (offsets)
 			offsets[len] = data + pos;
 		len++;
@@ -2586,11 +2631,13 @@ 
 	return len;
 }
 
-static int index_init(indexObject *self, PyObject *args)
+static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
 {
-	PyObject *data_obj, *inlined_obj;
+	PyObject *data_obj, *inlined_obj, *revlogv2;
 	Py_ssize_t size;
 
+	static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
+
 	/* Initialize before argument-checking to avoid index_dealloc() crash.
 	 */
 	self->added = NULL;
@@ -2606,7 +2653,9 @@ 
 	self->nodelen = 20;
 	self->nullentry = NULL;
 
-	if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
+  revlogv2 = NULL;
+  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
+                                   &data_obj, &inlined_obj, &revlogv2))
 		return -1;
 	if (!PyObject_CheckBuffer(data_obj)) {
 		PyErr_SetString(PyExc_TypeError,
@@ -2618,8 +2667,22 @@ 
 		return -1;
 	}
 
-	self->nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
-	                                -1, -1, -1, -1, nullid, self->nodelen);
+	if (revlogv2 && PyObject_IsTrue(revlogv2)) {
+		self->hdrsize = v2_hdrsize;
+	} else {
+		self->hdrsize = v1_hdrsize;
+	}
+
+	if (self->hdrsize == v1_hdrsize) {
+		self->nullentry =
+		    Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
+		                  -1, -1, -1, nullid, self->nodelen);
+	} else {
+		self->nullentry =
+		    Py_BuildValue(PY23("iiiiiiis#iiii", "iiiiiiiy#iiii"), 0, 0, 0,
+		                  -1, -1, -1, -1, nullid, self->nodelen, 0, 0, 0, 0);
+	}
+
 	if (!self->nullentry)
 		return -1;
 	PyObject_GC_UnTrack(self->nullentry);
@@ -2641,11 +2704,11 @@ 
 			goto bail;
 		self->length = len;
 	} else {
-		if (size % v1_hdrsize) {
+		if (size % self->hdrsize) {
 			PyErr_SetString(PyExc_ValueError, "corrupt index file");
 			goto bail;
 		}
-		self->length = size / v1_hdrsize;
+		self->length = size / self->hdrsize;
 	}
 
 	return 0;
@@ -2797,16 +2860,16 @@ 
 };
 
 /*
- * returns a tuple of the form (index, index, cache) with elements as
+ * returns a tuple of the form (index, cache) with elements as
  * follows:
  *
- * index: an index object that lazily parses RevlogNG records
+ * index: an index object that lazily parses Revlog (v1 or v2) records
  * cache: if data is inlined, a tuple (0, index_file_content), else None
  *        index_file_content could be a string, or a buffer
  *
  * added complications are for backwards compatibility
  */
-PyObject *parse_index2(PyObject *self, PyObject *args)
+PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
 {
 	PyObject *cache = NULL;
 	indexObject *idx;
@@ -2816,7 +2879,7 @@ 
 	if (idx == NULL)
 		goto bail;
 
-	ret = index_init(idx, args);
+	ret = index_init(idx, args, kwargs);
 	if (ret == -1)
 		goto bail;
 
diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c
--- a/mercurial/cext/parsers.c
+++ b/mercurial/cext/parsers.c
@@ -638,7 +638,7 @@ 
 PyObject *encodedir(PyObject *self, PyObject *args);
 PyObject *pathencode(PyObject *self, PyObject *args);
 PyObject *lowerencode(PyObject *self, PyObject *args);
-PyObject *parse_index2(PyObject *self, PyObject *args);
+PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs);
 
 static PyMethodDef methods[] = {
     {"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
@@ -646,7 +646,8 @@ 
      "create a set containing non-normal and other parent entries of given "
      "dirstate\n"},
     {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
-    {"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
+    {"parse_index2", (PyCFunction)parse_index2, METH_VARARGS | METH_KEYWORDS,
+     "parse a revlog index\n"},
     {"isasciistr", isasciistr, METH_VARARGS, "check if an ASCII string\n"},
     {"asciilower", asciilower, METH_VARARGS, "lowercase an ASCII string\n"},
     {"asciiupper", asciiupper, METH_VARARGS, "uppercase an ASCII string\n"},