Patchwork D10309: revlog: replace revlog._io.size with a new revlog.index.entry_size

login
register
mail settings
Submitter phabricator
Date April 5, 2021, 3:50 p.m.
Message ID <differential-rev-PHID-DREV-qk47pd4kfc6r33j4ejqe-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48626/
State Superseded
Headers show

Comments

phabricator - April 5, 2021, 3:50 p.m.
marmoute created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  The `revlogio` class is mostly a relic from the past. Once in charge of the full
  revlog related Input/Output code, that class gradually lost responsibilities to
  the point where more IO are now done by `revlog.index` objects or revlog objects
  themself. I would like to ultimately remove the `revlogio` class, to do so I
  start simple with move the "entry size" information on the index. (The index is
  already responsible of the binary unpacking, so it knows the size.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10309

AFFECTED FILES
  contrib/perf.py
  mercurial/cext/revlog.c
  mercurial/pure/parsers.py
  mercurial/revlog.py
  rust/hg-cpython/src/revlog.rs

CHANGE DETAILS




To: marmoute, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-cpython/src/revlog.rs b/rust/hg-cpython/src/revlog.rs
--- a/rust/hg-cpython/src/revlog.rs
+++ b/rust/hg-cpython/src/revlog.rs
@@ -12,8 +12,8 @@ 
 use cpython::{
     buffer::{Element, PyBuffer},
     exc::{IndexError, ValueError},
-    ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyModule, PyObject,
-    PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
+    ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
+    PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
 };
 use hg::{
     nodemap::{Block, NodeMapError, NodeTree},
@@ -285,6 +285,10 @@ 
         self.inner_update_nodemap_data(py, docket, nm_data)
     }
 
+    @property
+    def entry_size(&self) -> PyResult<PyInt> {
+        self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
+    }
 
 });
 
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -221,6 +221,8 @@ 
 
 
 class revlogoldindex(list):
+    entry_size = INDEX_ENTRY_V0.size
+
     @property
     def nodemap(self):
         msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
@@ -272,11 +274,8 @@ 
 
 
 class revlogoldio(object):
-    def __init__(self):
-        self.size = INDEX_ENTRY_V0.size
-
     def parseindex(self, data, inline):
-        s = self.size
+        s = INDEX_ENTRY_V0.size
         index = []
         nodemap = nodemaputil.NodeMap({nullid: nullrev})
         n = off = 0
@@ -333,9 +332,6 @@ 
 
 
 class revlogio(object):
-    def __init__(self):
-        self.size = INDEX_ENTRY_V1.size
-
     def parseindex(self, data, inline):
         # call the C implementation to parse the index data
         index, cache = parsers.parse_index2(data, inline)
@@ -349,9 +345,6 @@ 
 
 
 class revlogv2io(object):
-    def __init__(self):
-        self.size = INDEX_ENTRY_V2.size
-
     def parseindex(self, data, inline):
         index, cache = parsers.parse_index2(data, inline, revlogv2=True)
         return index, cache
@@ -1715,8 +1708,8 @@ 
             end = int(iend[0] >> 16) + iend[1]
 
         if self._inline:
-            start += (startrev + 1) * self._io.size
-            end += (endrev + 1) * self._io.size
+            start += (startrev + 1) * self.index.entry_size
+            end += (endrev + 1) * self.index.entry_size
         length = end - start
 
         return start, self._getsegment(start, length, df=df)
@@ -1750,7 +1743,7 @@ 
         start = self.start
         length = self.length
         inline = self._inline
-        iosize = self._io.size
+        iosize = self.index.entry_size
         buffer = util.buffer
 
         l = []
@@ -1978,7 +1971,7 @@ 
         sidedata_size = index_entry[9]
 
         if self._inline:
-            sidedata_offset += self._io.size * (1 + rev)
+            sidedata_offset += self.index.entry_size * (1 + rev)
         if sidedata_size == 0:
             return {}
 
@@ -2078,7 +2071,7 @@ 
             # the temp file replace the real index when we exit the context
             # manager
 
-        tr.replace(self.indexfile, trindex * self._io.size)
+        tr.replace(self.indexfile, trindex * self.index.entry_size)
         nodemaputil.setup_persistent_nodemap(tr, self)
         self._chunkclear()
 
@@ -2332,12 +2325,12 @@ 
                 # offset is "as if" it were in the .d file, so we need to add on
                 # the size of the entry metadata.
                 self._concurrencychecker(
-                    ifh, self.indexfile, offset + curr * self._io.size
+                    ifh, self.indexfile, offset + curr * self.index.entry_size
                 )
             else:
                 # Entries in the .i are a consistent size.
                 self._concurrencychecker(
-                    ifh, self.indexfile, curr * self._io.size
+                    ifh, self.indexfile, curr * self.index.entry_size
                 )
                 self._concurrencychecker(dfh, self.datafile, offset)
 
@@ -2461,7 +2454,7 @@ 
                 dfh.write(sidedata)
             ifh.write(entry)
         else:
-            offset += curr * self._io.size
+            offset += curr * self.index.entry_size
             transaction.add(self.indexfile, offset)
             ifh.write(entry)
             ifh.write(data[0])
@@ -2499,7 +2492,7 @@ 
         if r:
             end = self.end(r - 1)
         ifh = self._indexfp(b"a+")
-        isize = r * self._io.size
+        isize = r * self.index.entry_size
         if self._inline:
             transaction.add(self.indexfile, end + isize)
             dfh = None
@@ -2655,9 +2648,9 @@ 
         end = self.start(rev)
         if not self._inline:
             transaction.add(self.datafile, end)
-            end = rev * self._io.size
+            end = rev * self.index.entry_size
         else:
-            end += rev * self._io.size
+            end += rev * self.index.entry_size
 
         transaction.add(self.indexfile, end)
 
@@ -2696,7 +2689,7 @@ 
             f.seek(0, io.SEEK_END)
             actual = f.tell()
             f.close()
-            s = self._io.size
+            s = self.index.entry_size
             i = max(0, actual // s)
             di = actual - (i * s)
             if self._inline:
@@ -3238,7 +3231,7 @@ 
 
         # rewrite the new index entries
         with self._indexfp(b'w+') as fp:
-            fp.seek(startrev * self._io.size)
+            fp.seek(startrev * self.index.entry_size)
             for i, entry in enumerate(new_entries):
                 rev = startrev + i
                 self.index.replace_sidedata_info(rev, entry[8], entry[9])
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -49,11 +49,13 @@ 
     big_int_size = struct.calcsize(b'>Q')
     # Size of a C long int, platform independent
     int_size = struct.calcsize(b'>i')
-    # Size of the entire index format
-    index_size = revlog_constants.INDEX_ENTRY_V1.size
     # An empty index entry, used as a default value to be overridden, or nullrev
     null_item = (0, 0, 0, -1, -1, -1, -1, nullid)
 
+    @util.propertycache
+    def entry_size(self):
+        return self.index_format.size
+
     @property
     def nodemap(self):
         msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
@@ -116,7 +118,7 @@ 
             data = self._extra[i - self._lgt]
         else:
             index = self._calculate_index(i)
-            data = self._data[index : index + self.index_size]
+            data = self._data[index : index + self.entry_size]
         r = self.index_format.unpack(data)
         if self._lgt and i == 0:
             r = (offset_type(0, gettype(r[0])),) + r[1:]
@@ -125,13 +127,13 @@ 
 
 class IndexObject(BaseIndexObject):
     def __init__(self, data):
-        assert len(data) % self.index_size == 0
+        assert len(data) % self.entry_size == 0
         self._data = data
-        self._lgt = len(data) // self.index_size
+        self._lgt = len(data) // self.entry_size
         self._extra = []
 
     def _calculate_index(self, i):
-        return i * self.index_size
+        return i * self.entry_size
 
     def __delitem__(self, i):
         if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
@@ -140,7 +142,7 @@ 
         self._check_index(i)
         self._stripnodes(i)
         if i < self._lgt:
-            self._data = self._data[: i * self.index_size]
+            self._data = self._data[: i * self.entry_size]
             self._lgt = i
             self._extra = []
         else:
@@ -203,7 +205,7 @@ 
         if lgt is not None:
             self._offsets = [0] * lgt
         count = 0
-        while off <= len(self._data) - self.index_size:
+        while off <= len(self._data) - self.entry_size:
             start = off + self.big_int_size
             (s,) = struct.unpack(
                 b'>i',
@@ -212,7 +214,7 @@ 
             if lgt is not None:
                 self._offsets[count] = off
             count += 1
-            off += self.index_size + s
+            off += self.entry_size + s
         if off != len(self._data):
             raise ValueError(b"corrupted data")
         return count
@@ -244,7 +246,6 @@ 
 
 class Index2Mixin(object):
     index_format = revlog_constants.INDEX_ENTRY_V2
-    index_size = revlog_constants.INDEX_ENTRY_V2.size
     null_item = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0)
 
     def replace_sidedata_info(self, i, sidedata_offset, sidedata_length):
@@ -280,7 +281,7 @@ 
         if lgt is not None:
             self._offsets = [0] * lgt
         count = 0
-        while off <= len(self._data) - self.index_size:
+        while off <= len(self._data) - self.entry_size:
             start = off + self.big_int_size
             (data_size,) = struct.unpack(
                 b'>i',
@@ -293,7 +294,7 @@ 
             if lgt is not None:
                 self._offsets[count] = off
             count += 1
-            off += self.index_size + data_size + side_data_size
+            off += self.entry_size + data_size + side_data_size
         if off != len(self._data):
             raise ValueError(b"corrupted data")
         return count
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -15,6 +15,7 @@ 
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
+#include <structmember.h>
 
 #include "bitmanipulation.h"
 #include "charencode.h"
@@ -2867,6 +2868,12 @@ 
     {NULL} /* Sentinel */
 };
 
+static PyMemberDef index_members[] = {
+    {"entry_size", T_LONG, offsetof(indexObject, hdrsize), 0,
+     "size of an index entry"},
+    {NULL}  /* Sentinel */
+};
+
 PyTypeObject HgRevlogIndex_Type = {
     PyVarObject_HEAD_INIT(NULL, 0) /* header */
     "parsers.index",               /* tp_name */
@@ -2896,7 +2903,7 @@ 
     0,                             /* tp_iter */
     0,                             /* tp_iternext */
     index_methods,                 /* tp_methods */
-    0,                             /* tp_members */
+    index_members,         /* tp_members */
     index_getset,                  /* tp_getset */
     0,                             /* tp_base */
     0,                             /* tp_dict */
diff --git a/contrib/perf.py b/contrib/perf.py
--- a/contrib/perf.py
+++ b/contrib/perf.py
@@ -3222,7 +3222,10 @@ 
         start = r.start
         length = r.length
         inline = r._inline
-        iosize = r._io.size
+        try:
+            iosize = r.index.entry_size
+        except AttributeError:
+            iosize = r._io.size
         buffer = util.buffer
 
         chunks = []