Patchwork D7886: nodemap: introduce append-only incremental update of the persisten data

login
register
mail settings
Submitter phabricator
Date Feb. 11, 2020, 1:26 a.m.
Message ID <8083e1e160b5d8fb6f1e52b4ca6a95a3@localhost.localdomain>
Download mbox | patch
Permalink /patch/45137/
State Not Applicable
Headers show

Comments

phabricator - Feb. 11, 2020, 1:26 a.m.
Closed by commit rHG50ad851efd9b: nodemap: introduce append-only incremental update of the persistent data (authored by marmoute).
This revision was automatically updated to reflect the committed changes.
This revision was not accepted when it landed; it landed in state "Needs Review".

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7886?vs=19898&id=20119

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7886/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7886

AFFECTED FILES
  mercurial/pure/parsers.py
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS




To: marmoute, #hg-reviewers
Cc: martinvonz, mercurial-devel

Patch

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -49,8 +49,19 @@ 
   $ hg ci -m 'foo'
   $ f --size .hg/store/00changelog.n
   .hg/store/00changelog.n: size=18
+
+(The pure code use the debug code that perform incremental update, the C code reencode from scratch)
+
+#if pure
+  $ f --sha256 .hg/store/00changelog-*.nd --size
+  .hg/store/00changelog-????????????????.nd: size=123072, sha256=136472751566c8198ff09e306a7d2f9bd18bd32298d614752b73da4d6df23340 (glob)
+
+#else
   $ f --sha256 .hg/store/00changelog-*.nd --size
   .hg/store/00changelog-????????????????.nd: size=122880, sha256=bfafebd751c4f6d116a76a37a1dee2a251747affe7efbcc4f4842ccc746d4db9 (glob)
+
+#endif
+
   $ hg debugnodemap --check
   revision in index:   5002
   revision in nodemap: 5002
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -69,12 +69,41 @@ 
     if revlog.nodemap_file is None:
         msg = "calling persist nodemap on a revlog without the feature enableb"
         raise error.ProgrammingError(msg)
-    if util.safehasattr(revlog.index, "nodemap_data_all"):
-        data = revlog.index.nodemap_data_all()
+
+    can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
+    ondisk_docket = revlog._nodemap_docket
+
+    # first attemp an incremental update of the data
+    if can_incremental and ondisk_docket is not None:
+        target_docket = revlog._nodemap_docket.copy()
+        data = revlog.index.nodemap_data_incremental()
+        datafile = _rawdata_filepath(revlog, target_docket)
+        # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+        # store vfs
+        with revlog.opener(datafile, b'a') as fd:
+            fd.write(data)
     else:
-        data = persistent_data(revlog.index)
-    target_docket = NodeMapDocket()
-    datafile = _rawdata_filepath(revlog, target_docket)
+        # otherwise fallback to a full new export
+        target_docket = NodeMapDocket()
+        datafile = _rawdata_filepath(revlog, target_docket)
+        if util.safehasattr(revlog.index, "nodemap_data_all"):
+            data = revlog.index.nodemap_data_all()
+        else:
+            data = persistent_data(revlog.index)
+        # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+        # store vfs
+        with revlog.opener(datafile, b'w') as fd:
+            fd.write(data)
+    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+    # store vfs
+    with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
+        fp.write(target_docket.serialize())
+    revlog._nodemap_docket = target_docket
+    # EXP-TODO: if the transaction abort, we should remove the new data and
+    # reinstall the old one.
+
+    # search for old index file in all cases, some older process might have
+    # left one behind.
     olds = _other_rawdata_filepath(revlog, target_docket)
     if olds:
         realvfs = getattr(revlog, '_realopener', revlog.opener)
@@ -85,17 +114,6 @@ 
 
         callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
         tr.addpostclose(callback_id, cleanup)
-    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
-    # store vfs
-    with revlog.opener(datafile, b'w') as fd:
-        fd.write(data)
-    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
-    # store vfs
-    with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
-        fp.write(target_docket.serialize())
-    revlog._nodemap_docket = target_docket
-    # EXP-TODO: if the transaction abort, we should remove the new data and
-    # reinstall the old one.
 
 
 ### Nodemap docket file
@@ -208,6 +226,13 @@ 
     return _persist_trie(trie)
 
 
+def update_persistent_data(index, root, max_idx, last_rev):
+    """return the incremental update for persistent nodemap from a given index
+    """
+    trie = _update_trie(index, root, last_rev)
+    return _persist_trie(trie, existing_idx=max_idx)
+
+
 S_BLOCK = struct.Struct(">" + ("l" * 16))
 
 NO_ENTRY = -1
@@ -260,6 +285,14 @@ 
     return root
 
 
+def _update_trie(index, root, last_rev):
+    """consume"""
+    for rev in range(last_rev + 1, len(index)):
+        hex = nodemod.hex(index[rev][7])
+        _insert_into_block(index, 0, root, rev, hex)
+    return root
+
+
 def _insert_into_block(index, level, block, current_rev, current_hex):
     """insert a new revision in a block
 
@@ -269,6 +302,8 @@ 
     current_rev: the revision number we are adding
     current_hex: the hexadecimal representation of the of that revision
     """
+    if block.ondisk_id is not None:
+        block.ondisk_id = None
     hex_digit = _to_int(current_hex[level : level + 1])
     entry = block.get(hex_digit)
     if entry is None:
@@ -288,15 +323,22 @@ 
         _insert_into_block(index, level + 1, new, current_rev, current_hex)
 
 
-def _persist_trie(root):
+def _persist_trie(root, existing_idx=None):
     """turn a nodemap trie into persistent binary data
 
     See `_build_trie` for nodemap trie structure"""
     block_map = {}
+    if existing_idx is not None:
+        base_idx = existing_idx + 1
+    else:
+        base_idx = 0
     chunks = []
     for tn in _walk_trie(root):
-        block_map[id(tn)] = len(chunks)
-        chunks.append(_persist_block(tn, block_map))
+        if tn.ondisk_id is not None:
+            block_map[id(tn)] = tn.ondisk_id
+        else:
+            block_map[id(tn)] = len(chunks) + base_idx
+            chunks.append(_persist_block(tn, block_map))
     return b''.join(chunks)
 
 
@@ -338,7 +380,7 @@ 
         msg = "nodemap data size is not a multiple of block size (%d): %d"
         raise error.Abort(msg % (S_BLOCK.size, len(data)))
     if not data:
-        return Block()
+        return Block(), None
     block_map = {}
     new_blocks = []
     for i in range(0, len(data), S_BLOCK.size):
@@ -356,7 +398,7 @@ 
                 b[idx] = block_map[v]
             else:
                 b[idx] = _transform_rev(v)
-    return block
+    return block, i // S_BLOCK.size
 
 
 # debug utility
@@ -366,7 +408,7 @@ 
     """verify that the provided nodemap data are valid for the given idex"""
     ret = 0
     ui.status((b"revision in index:   %d\n") % len(index))
-    root = parse_data(data)
+    root, __ = parse_data(data)
     all_revs = set(_all_revisions(root))
     ui.status((b"revision in nodemap: %d\n") % len(all_revs))
     for r in range(len(index)):
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -156,13 +156,31 @@ 
         index."""
         return nodemaputil.persistent_data(self)
 
+    def nodemap_data_incremental(self):
+        """Return bytes containing a incremental update to persistent nodemap
+
+        This containst the data for an append-only update of the data provided
+        in the last call to `update_nodemap_data`.
+        """
+        if self._nm_root is None:
+            return None
+        data = nodemaputil.update_persistent_data(
+            self, self._nm_root, self._nm_max_idx, self._nm_rev
+        )
+        self._nm_root = self._nm_max_idx = self._nm_rev = None
+        return data
+
     def update_nodemap_data(self, nm_data):
         """provide full blokc of persisted binary data for a nodemap
 
         The data are expected to come from disk. See `nodemap_data_all` for a
         produceur of such data."""
         if nm_data is not None:
-            nodemaputil.parse_data(nm_data)
+            self._nm_root, self._nm_max_idx = nodemaputil.parse_data(nm_data)
+            if self._nm_root:
+                self._nm_rev = len(self) - 1
+            else:
+                self._nm_root = self._nm_max_idx = self._nm_rev = None
 
 
 class InlinedIndexObject(BaseIndexObject):