Patchwork D9843: revlog: introduce v2 format

login
register
mail settings
Submitter phabricator
Date Jan. 20, 2021, 8:46 p.m.
Message ID <differential-rev-PHID-DREV-amurhkosyqvxydungqof-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48156/
State New
Headers show

Comments

phabricator - Jan. 20, 2021, 8:46 p.m.
Alphare created this revision.
Herald added a reviewer: indygreg.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  As documented in [1], this is still tentative and could be subject to change,
  but we need to lay down the foundations in order to work on the next abstraction
  layers.
  
  [1] https://www.mercurial-scm.org/wiki/RevlogV2Plan

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D9843

AFFECTED FILES
  mercurial/localrepo.py
  mercurial/pure/parsers.py
  mercurial/requirements.py
  mercurial/revlog.py
  mercurial/revlogutils/constants.py
  tests/test-parseindex2.py
  tests/test-requires.t
  tests/test-revlog-v2.t
  tests/test-revlog.t

CHANGE DETAILS




To: Alphare, indygreg, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/tests/test-revlog.t b/tests/test-revlog.t
--- a/tests/test-revlog.t
+++ b/tests/test-revlog.t
@@ -22,10 +22,10 @@ 
 Unknown version is rejected
 
   >>> with open('.hg/store/00changelog.i', 'wb') as fh:
-  ...     fh.write(b'\x00\x00\x00\x02') and None
+  ...     fh.write(b'\x00\x00\x00\x03') and None
 
   $ hg log
-  abort: unknown version (2) in revlog 00changelog.i
+  abort: unknown version (3) in revlog 00changelog.i
   [50]
 
   $ cd ..
diff --git a/tests/test-revlog-v2.t b/tests/test-revlog-v2.t
--- a/tests/test-revlog-v2.t
+++ b/tests/test-revlog-v2.t
@@ -22,7 +22,7 @@ 
   $ cd empty-repo
   $ cat .hg/requires
   dotencode
-  exp-revlogv2.1
+  exp-revlogv2.2
   fncache
   sparserevlog
   store
@@ -32,10 +32,10 @@ 
 Unknown flags to revlog are rejected
 
   >>> with open('.hg/store/00changelog.i', 'wb') as fh:
-  ...     fh.write(b'\xff\x00\xde\xad') and None
+  ...     fh.write(b'\xff\x00\x00\x02') and None
 
   $ hg log
-  abort: unknown flags (0xff00) in version 57005 revlog 00changelog.i
+  abort: unknown flags (0xff00) in version 2 revlog 00changelog.i
   [50]
 
   $ cd ..
@@ -58,8 +58,8 @@ 
 
   $ f --hexdump --bytes 4 .hg/store/00changelog.i
   .hg/store/00changelog.i:
-  0000: 00 01 de ad                                     |....|
+  0000: 00 01 00 02                                     |....|
 
   $ f --hexdump --bytes 4 .hg/store/data/foo.i
   .hg/store/data/foo.i:
-  0000: 00 01 de ad                                     |....|
+  0000: 00 01 00 02                                     |....|
diff --git a/tests/test-requires.t b/tests/test-requires.t
--- a/tests/test-requires.t
+++ b/tests/test-requires.t
@@ -5,7 +5,7 @@ 
   $ hg commit -m test
   $ rm .hg/requires
   $ hg tip
-  abort: unknown version (2) in revlog 00changelog.i
+  abort: unknown version (65535) in revlog 00changelog.i
   [50]
   $ echo indoor-pool > .hg/requires
   $ hg tip
diff --git a/tests/test-parseindex2.py b/tests/test-parseindex2.py
--- a/tests/test-parseindex2.py
+++ b/tests/test-parseindex2.py
@@ -117,8 +117,8 @@ 
 )
 
 
-def parse_index2(data, inline):
-    index, chunkcache = parsers.parse_index2(data, inline)
+def parse_index2(data, inline, revlogv2=False):
+    index, chunkcache = parsers.parse_index2(data, inline, revlogv2=revlogv2)
     return list(index), chunkcache
 
 
diff --git a/mercurial/revlogutils/constants.py b/mercurial/revlogutils/constants.py
--- a/mercurial/revlogutils/constants.py
+++ b/mercurial/revlogutils/constants.py
@@ -14,9 +14,7 @@ 
 # revlog header flags
 REVLOGV0 = 0
 REVLOGV1 = 1
-# Dummy value until file format is finalized.
-# Reminder: change the bounds check in revlog.__init__ when this is changed.
-REVLOGV2 = 0xDEAD
+REVLOGV2 = 2
 # Shared across v1 and v2.
 FLAG_INLINE_DATA = 1 << 16
 # Only used by v1, implied by v2.
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -69,6 +69,7 @@ 
     templatefilters,
     util,
 )
+from .pure import parsers as pureparsers
 from .interfaces import (
     repository,
     util as interfaceutil,
@@ -364,6 +365,40 @@ 
         return p
 
 
+# index v2:
+#  6 bytes: offset
+#  2 bytes: flags
+#  4 bytes: compressed length
+#  4 bytes: uncompressed length
+#  4 bytes: base rev
+#  4 bytes: link rev
+#  4 bytes: parent 1 rev
+#  4 bytes: parent 2 rev
+# 32 bytes: nodeid
+#  8 bytes: UnifiedRevlog identifier
+#  4 bytes: rank (number of changesets under this one, this one included)
+#  8 bytes: sidedata offset
+#  4 bytes: sidedata compressed length
+#  8 bytes: Padding to align to 96 bytes
+indexformatv2 = struct.Struct(b">Qiiiiii20s12xQiQi8x")
+indexformatv2_pack = indexformatv2.pack
+
+
+class revlogv2io(object):
+    def __init__(self):
+        self.size = indexformatv2.size
+
+    def parseindex(self, data, inline):
+        index, cache = parsers.parse_index2(data, inline, revlogv2=True)
+        return index, cache
+
+    def packentry(self, entry, node, version, rev):
+        p = indexformatv2_pack(*entry)
+        if rev == 0:
+            p = versionformat_pack(version) + p[4:]
+        return p
+
+
 NodemapRevlogIO = None
 
 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
@@ -647,6 +682,8 @@ 
         self._io = revlogio()
         if self.version == REVLOGV0:
             self._io = revlogoldio()
+        elif fmt == REVLOGV2:
+            self._io = revlogv2io()
         elif devel_nodemap:
             self._io = NodemapRevlogIO()
         elif use_rust_index:
@@ -2318,7 +2355,15 @@ 
             p1r,
             p2r,
             node,
+            0,
+            0,
+            0,
+            0,
         )
+
+        if self.version & 0xFFFF != REVLOGV2:
+            e = e[:8]
+
         self.index.append(e)
 
         entry = self._io.packentry(e, self.node, self.version, curr)
diff --git a/mercurial/requirements.py b/mercurial/requirements.py
--- a/mercurial/requirements.py
+++ b/mercurial/requirements.py
@@ -23,7 +23,7 @@ 
 
 # Increment the sub-version when the revlog v2 format changes to lock out old
 # clients.
-REVLOGV2_REQUIREMENT = b'exp-revlogv2.1'
+REVLOGV2_REQUIREMENT = b'exp-revlogv2.2'
 
 # A repository with the sparserevlog feature will have delta chains that
 # can spread over a larger span. Sparse reading cuts these large spans into
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -32,14 +32,6 @@ 
     # x is a tuple
     return x
 
-
-indexformatng = b">Qiiiiii20s12x"
-indexfirst = struct.calcsize(b'Q')
-sizeint = struct.calcsize(b'i')
-indexsize = struct.calcsize(indexformatng)
-nullitem = (0, 0, 0, -1, -1, -1, -1, nullid)
-
-
 def gettype(q):
     return int(q & 0xFFFF)
 
@@ -49,6 +41,12 @@ 
 
 
 class BaseIndexObject(object):
+    indexformat = b">Qiiiiii20s12x"
+    indexfirst = struct.calcsize(b'Q')
+    sizeint = struct.calcsize(b'i')
+    indexsize = struct.calcsize(indexformat)
+    nullitem = (0, 0, 0, -1, -1, -1, -1, nullid)
+
     @property
     def nodemap(self):
         msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
@@ -94,7 +92,7 @@ 
     def append(self, tup):
         if '_nodemap' in vars(self):
             self._nodemap[tup[7]] = len(self)
-        data = _pack(indexformatng, *tup)
+        data = _pack(self.indexformat, *tup)
         self._extra.append(data)
 
     def _check_index(self, i):
@@ -105,14 +103,14 @@ 
 
     def __getitem__(self, i):
         if i == -1:
-            return nullitem
+            return self.nullitem
         self._check_index(i)
         if i >= self._lgt:
             data = self._extra[i - self._lgt]
         else:
             index = self._calculate_index(i)
-            data = self._data[index : index + indexsize]
-        r = _unpack(indexformatng, data)
+            data = self._data[index : index + self.indexsize]
+        r = _unpack(self.indexformat, data)
         if self._lgt and i == 0:
             r = (offset_type(0, gettype(r[0])),) + r[1:]
         return r
@@ -120,13 +118,13 @@ 
 
 class IndexObject(BaseIndexObject):
     def __init__(self, data):
-        assert len(data) % indexsize == 0
+        assert len(data) % self.indexsize == 0
         self._data = data
-        self._lgt = len(data) // indexsize
+        self._lgt = len(data) // self.indexsize
         self._extra = []
 
     def _calculate_index(self, i):
-        return i * indexsize
+        return i * self.indexsize
 
     def __delitem__(self, i):
         if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
@@ -135,7 +133,7 @@ 
         self._check_index(i)
         self._stripnodes(i)
         if i < self._lgt:
-            self._data = self._data[: i * indexsize]
+            self._data = self._data[: i * self.indexsize]
             self._lgt = i
             self._extra = []
         else:
@@ -198,14 +196,20 @@ 
         if lgt is not None:
             self._offsets = [0] * lgt
         count = 0
-        while off <= len(self._data) - indexsize:
+        while off <= len(self._data) - self.indexsize:
             (s,) = struct.unpack(
-                b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
+                b'>i',
+                self._data[
+                    off
+                    + self.indexfirst : off
+                    + self.sizeint
+                    + self.indexfirst
+                ],
             )
             if lgt is not None:
                 self._offsets[count] = off
             count += 1
-            off += indexsize + s
+            off += self.indexsize + s
         if off != len(self._data):
             raise ValueError(b"corrupted data")
         return count
@@ -227,11 +231,54 @@ 
         return self._offsets[i]
 
 
-def parse_index2(data, inline):
+def parse_index2(data, inline, revlogv2=False):
     if not inline:
-        return IndexObject(data), None
-    return InlinedIndexObject(data, inline), (0, data)
+        cls = IndexObject2 if revlogv2 else IndexObject
+        return cls(data), None
+    cls = InlinedIndexObject2 if revlogv2 else InlinedIndexObject
+    return cls(data, inline), (0, data)
+
+
+class Index2Mixin(object):
+    indexformat = b">Qiiiiii20s12xQiQi8x"
+    indexfirst = struct.calcsize(b'Q')
+    sizeint = struct.calcsize(b'i')
+    indexsize = struct.calcsize(indexformat)
+    assert indexsize == 96, indexsize
+    nullitem = (0, 0, 0, -1, -1, -1, -1, nullid, 0, 0, 0, 0, 0)
+
+
+class IndexObject2(Index2Mixin, IndexObject):
+    pass
+
 
+class InlinedIndexObject2(Index2Mixin, InlinedIndexObject):
+    def _inline_scan(self, lgt):
+        off = 0
+        if lgt is not None:
+            self._offsets = [0] * lgt
+        count = 0
+        while off <= len(self._data) - self.indexsize:
+            (data_size,) = struct.unpack(
+                b'>i',
+                self._data[
+                    off
+                    + self.indexfirst : off
+                    + self.sizeint
+                    + self.indexfirst
+                ],
+            )
+            start = off + self.indexsize - self.sizeint
+            (side_data_size,) = struct.unpack(
+                b'>i', self._data[start : off + self.indexsize]
+            )
+            if lgt is not None:
+                self._offsets[count] = off
+            count += 1
+            off += self.indexsize + data_size + side_data_size
+        if off != len(self._data):
+            raise ValueError(b"corrupted data")
+        return count
 
 def parse_index_devel_nodemap(data, inline):
     """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py
--- a/mercurial/localrepo.py
+++ b/mercurial/localrepo.py
@@ -3636,11 +3636,11 @@ 
         # effectively locks out old clients and prevents them from
         # mucking with a repo in an unknown format.
         #
-        # The revlog header has version 2, which won't be recognized by
+        # The revlog header has version 65535, which won't be recognized by
         # such old clients.
         hgvfs.append(
             b'00changelog.i',
-            b'\0\0\0\2 dummy changelog to prevent using the old repo '
+            b'\0\0\xFF\xFF dummy changelog to prevent using the old repo '
             b'layout',
         )