Patchwork D10568: revlog: make the index always return the same tuple

login
register
mail settings
Submitter phabricator
Date May 3, 2021, 11:51 a.m.
Message ID <differential-rev-PHID-DREV-s2tlvcouhecjjgukb663-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48886/
State Superseded
Headers show

Comments

phabricator - May 3, 2021, 11:51 a.m.
marmoute created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  It is simpler to manage the diferrence in on disk format in the internal index
  code itself and lets the rest of the code always handle the same object.
  
  This will become even more important when the data we store will be entirely
  different (for example the changelog does not need the "linkrev" field.
  
  We start with item reading, we will deal with item writing in the next
  changesets.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10568

AFFECTED FILES
  mercurial/cext/revlog.c
  mercurial/pure/parsers.py
  mercurial/revlogutils/revlogv0.py
  mercurial/unionrepo.py
  tests/test-parseindex2.py

CHANGE DETAILS




To: marmoute, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/tests/test-parseindex2.py b/tests/test-parseindex2.py
--- a/tests/test-parseindex2.py
+++ b/tests/test-parseindex2.py
@@ -49,6 +49,7 @@ 
         cache = (0, data)
         while off <= l:
             e = struct.unpack(indexformatng, data[off : off + s])
+            e = e + (0, 0)
             nodemap[e[7]] = n
             append(e)
             n += 1
@@ -58,6 +59,7 @@ 
     else:
         while off <= l:
             e = struct.unpack(indexformatng, data[off : off + s])
+            e = e + (0, 0)
             nodemap[e[7]] = n
             append(e)
             n += 1
@@ -240,7 +242,7 @@ 
                 break
 
     def testminusone(self):
-        want = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
+        want = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
         index, junk = parsers.parse_index2(data_inlined, True)
         got = index[-1]
         self.assertEqual(want, got)  # inline data
diff --git a/mercurial/unionrepo.py b/mercurial/unionrepo.py
--- a/mercurial/unionrepo.py
+++ b/mercurial/unionrepo.py
@@ -54,7 +54,18 @@ 
         for rev2 in self.revlog2:
             rev = self.revlog2.index[rev2]
             # rev numbers - in revlog2, very different from self.rev
-            _start, _csize, rsize, base, linkrev, p1rev, p2rev, node = rev
+            (
+                _start,
+                _csize,
+                rsize,
+                base,
+                linkrev,
+                p1rev,
+                p2rev,
+                node,
+                _sdo,
+                _sds,
+            ) = rev
             flags = _start & 0xFFFF
 
             if linkmapper is None:  # link is to same revlog
diff --git a/mercurial/revlogutils/revlogv0.py b/mercurial/revlogutils/revlogv0.py
--- a/mercurial/revlogutils/revlogv0.py
+++ b/mercurial/revlogutils/revlogv0.py
@@ -135,6 +135,8 @@ 
             nodemap.get(e[4], node.nullrev),
             nodemap.get(e[5], node.nullrev),
             e[6],
+            0,  # no side data support
+            0,  # no side data support
         )
         index.append(e2)
         nodemap[e[6]] = n
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -53,7 +53,7 @@ 
     # Size of a C long int, platform independent
     int_size = struct.calcsize(b'>i')
     # An empty index entry, used as a default value to be overridden, or nullrev
-    null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
+    null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
 
     @util.propertycache
     def entry_size(self):
@@ -122,11 +122,16 @@ 
         else:
             index = self._calculate_index(i)
             data = self._data[index : index + self.entry_size]
-        r = self.index_format.unpack(data)
+        r = self._unpack_entry(data)
         if self._lgt and i == 0:
             r = (offset_type(0, gettype(r[0])),) + r[1:]
         return r
 
+    def _unpack_entry(self, data):
+        r = self.index_format.unpack(data)
+        r = r + (0, 0)
+        return r
+
     def pack_header(self, header):
         """pack header information as binary"""
         v_fmt = revlog_constants.INDEX_HEADER
@@ -135,7 +140,7 @@ 
     def entry_binary(self, rev):
         """return the raw binary string representing a revision"""
         entry = self[rev]
-        p = revlog_constants.INDEX_ENTRY_V1.pack(*entry)
+        p = revlog_constants.INDEX_ENTRY_V1.pack(*entry[:8])
         if rev == 0:
             p = p[revlog_constants.INDEX_HEADER.size :]
         return p
@@ -266,7 +271,6 @@ 
 
 class Index2Mixin(object):
     index_format = revlog_constants.INDEX_ENTRY_V2
-    null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0)
 
     def replace_sidedata_info(
         self, i, sidedata_offset, sidedata_length, offset_flags
@@ -292,6 +296,9 @@ 
             msg = b"cannot rewrite entries outside of this transaction"
             raise KeyError(msg)
 
+    def _unpack_entry(self, data):
+        return self.index_format.unpack(data)
+
     def entry_binary(self, rev):
         """return the raw binary string representing a revision"""
         entry = self[rev]
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -335,18 +335,17 @@ 
 	c_node_id = data + 32;
 
 	if (self->entry_size == v1_entry_size) {
-		return Py_BuildValue(v1_tuple_format, offset_flags, comp_len,
-		                     uncomp_len, base_rev, link_rev, parent_1,
-		                     parent_2, c_node_id, self->nodelen);
+		sidedata_offset = 0;
+		sidedata_comp_len = 0;
 	} else {
 		sidedata_offset = getbe64(data + 64);
 		sidedata_comp_len = getbe32(data + 72);
-
-		return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
-		                     uncomp_len, base_rev, link_rev, parent_1,
-		                     parent_2, c_node_id, self->nodelen,
-		                     sidedata_offset, sidedata_comp_len);
 	}
+
+	return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
+	                     uncomp_len, base_rev, link_rev, parent_1, parent_2,
+	                     c_node_id, self->nodelen, sidedata_offset,
+	                     sidedata_comp_len);
 }
 /*
  * Pack header information in binary
@@ -2769,15 +2768,9 @@ 
 		self->entry_size = v1_entry_size;
 	}
 
-	if (self->format_version == format_v1) {
-		self->nullentry =
-		    Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
-		                  -1, -1, -1, nullid, self->nodelen);
-	} else {
-		self->nullentry =
-		    Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0,
-		                  -1, -1, -1, -1, nullid, self->nodelen, 0, 0);
-	}
+	self->nullentry =
+	    Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0, -1, -1,
+	                  -1, -1, nullid, self->nodelen, 0, 0);
 
 	if (!self->nullentry)
 		return -1;