Patchwork D7834: nodemap: have some python code writing a nodemap in persistent binary form

login
register
mail settings
Submitter phabricator
Date Feb. 2, 2020, 8:18 a.m.
Message ID <99e3d6966f56da181682180b620fabce@localhost.localdomain>
Download mbox | patch
Permalink /patch/44879/
State Not Applicable
Headers show

Comments

phabricator - Feb. 2, 2020, 8:18 a.m.
marmoute added a comment.
marmoute updated this revision to Diff 19824.


  small doc update on .#s[1]

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7834?vs=19779&id=19824

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7834/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7834

AFFECTED FILES
  mercurial/debugcommands.py
  mercurial/revlogutils/nodemap.py
  tests/test-completion.t
  tests/test-help.t
  tests/test-persistent-nodemap.t

CHANGE DETAILS




To: marmoute, #hg-reviewers, martinvonz
Cc: martinvonz, mjpieters, mercurial-devel

Patch

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
new file mode 100644
--- /dev/null
+++ b/tests/test-persistent-nodemap.t
@@ -0,0 +1,26 @@ 
+===================================
+Test the persistent on-disk nodemap
+===================================
+
+
+  $ hg init test-repo
+  $ cd test-repo
+  $ hg debugbuilddag .+5000
+  $ hg debugnodemap --dump | f --sha256 --bytes=256 --hexdump --size
+  size=245760, sha256=bc400bf49f11e83bbd25630439feee6628a80a8602d2e38972eac44cc3efe10c
+  0000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+  0010: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+  0020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+  0030: ff ff ff ff ff ff fa c2 ff ff ff ff ff ff ff ff |................|
+  0040: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+  0050: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+  0060: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ed b3 |................|
+  0070: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+  0080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ee 34 |...............4|
+  0090: 00 00 00 00 00 00 00 00 ff ff ff ff ff ff ff ff |................|
+  00a0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+  00b0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+  00c0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+  00d0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+  00e0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
+  00f0: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff |................|
diff --git a/tests/test-help.t b/tests/test-help.t
--- a/tests/test-help.t
+++ b/tests/test-help.t
@@ -1017,6 +1017,7 @@ 
                  print merge state
    debugnamecomplete
                  complete "names" - tags, open branch names, bookmark names
+   debugnodemap  write and inspect on disk nodemap
    debugobsolete
                  create arbitrary obsolete marker
    debugoptADV   (no help text available)
diff --git a/tests/test-completion.t b/tests/test-completion.t
--- a/tests/test-completion.t
+++ b/tests/test-completion.t
@@ -107,6 +107,7 @@ 
   debugmanifestfulltextcache
   debugmergestate
   debugnamecomplete
+  debugnodemap
   debugobsolete
   debugp1copies
   debugp2copies
@@ -289,6 +290,7 @@ 
   debugmanifestfulltextcache: clear, add
   debugmergestate: 
   debugnamecomplete: 
+  debugnodemap: dump
   debugobsolete: flags, record-parents, rev, exclusive, index, delete, date, user, template
   debugp1copies: rev
   debugp2copies: rev
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -7,9 +7,156 @@ 
 # GNU General Public License version 2 or any later version.
 
 from __future__ import absolute_import
-from .. import error
+
+import struct
+
+from .. import (
+    error,
+    node as nodemod,
+    pycompat,
+)
 
 
 class NodeMap(dict):
     def __missing__(self, x):
         raise error.RevlogError(b'unknown node: %s' % x)
+
+
+### Nodemap Trie
+#
+# This is a simple reference implementation to compute and persist a nodemap
+# trie. This reference implementation is write only. The python version of this
+# is not expected to be actually used, since it wont provide performance
+# improvement over existing non-persistent C implementation.
+#
+# The nodemap is persisted as Trie using 4bits-address/16-entries block. each
+# revision can be adressed using its node shortest prefix.
+#
+# The trie is stored as a sequence of block. Each block contains 16 entries
+# (signed 64bit integer, big endian). Each entry can be one of the following:
+#
+#  * value >=  0 -> index of sub-block
+#  * value == -1 -> no value
+#  * value <  -1 -> a revision value: rev = -(value+10)
+#
+# The implementation focus on simplicity, not on performance. A Rust
+# implementation should provide a efficient version of the same binary
+# persistence. This reference python implementation is never meant to be
+# extensively use in production.
+
+
+def persistent_data(index):
+    """return the persistent binary form for a nodemap for a given index
+    """
+    trie = _build_trie(index)
+    return _persist_trie(trie)
+
+
+S_BLOCK = struct.Struct(">" + ("q" * 16))
+
+NO_ENTRY = -1
+# rev 0 need to be -2 because 0 is used by block, -1 is a special value.
+REV_OFFSET = 2
+
+
+def _transform_rev(rev):
+    """Return the number used to represent the rev in the tree.
+
+    (or retrieve a rev number from such representation)
+
+    Note that this is an involution, a function equal to its inverse (i.e.
+    which gives the identity when applied to itself).
+    """
+    return -(rev + REV_OFFSET)
+
+
+def _to_int(hex_digit):
+    """turn an hexadecimal digit into a proper integer"""
+    return int(hex_digit, 16)
+
+
+def _build_trie(index):
+    """build a nodemap trie
+
+    The nodemap stores revision number for each unique prefix.
+
+    Each block is a dictionary with keys in `[0, 15]`. Values are either
+    another block or a revision number.
+    """
+    root = {}
+    for rev in range(len(index)):
+        hex = nodemod.hex(index[rev][7])
+        _insert_into_block(index, 0, root, rev, hex)
+    return root
+
+
+def _insert_into_block(index, level, block, current_rev, current_hex):
+    """insert a new revision in a block
+
+    index: the index we are adding revision for
+    level: the depth of the current block in the trie
+    block: the block currently being considered
+    current_rev: the revision number we are adding
+    current_hex: the hexadecimal representation of the of that revision
+    """
+    hex_digit = _to_int(current_hex[level : level + 1])
+    entry = block.get(hex_digit)
+    if entry is None:
+        # no entry, simply store the revision number
+        block[hex_digit] = current_rev
+    elif isinstance(entry, dict):
+        # need to recurse to an underlying block
+        _insert_into_block(index, level + 1, entry, current_rev, current_hex)
+    else:
+        # collision with a previously unique prefix, inserting new
+        # vertices to fit both entry.
+        other_hex = nodemod.hex(index[entry][7])
+        other_rev = entry
+        new = {}
+        block[hex_digit] = new
+        _insert_into_block(index, level + 1, new, other_rev, other_hex)
+        _insert_into_block(index, level + 1, new, current_rev, current_hex)
+
+
+def _persist_trie(root):
+    """turn a nodemap trie into persistent binary data
+
+    See `_build_trie` for nodemap trie structure"""
+    block_map = {}
+    chunks = []
+    for tn in _walk_trie(root):
+        block_map[id(tn)] = len(chunks)
+        chunks.append(_persist_block(tn, block_map))
+    return b''.join(chunks)
+
+
+def _walk_trie(block):
+    """yield all the block in a trie
+
+    Children blocks are always yield before their parent block.
+    """
+    for (_, item) in sorted(block.items()):
+        if isinstance(item, dict):
+            for sub_block in _walk_trie(item):
+                yield sub_block
+    yield block
+
+
+def _persist_block(block_node, block_map):
+    """produce persistent binary data for a single block
+
+    Children block are assumed to be already persisted and present in
+    block_map.
+    """
+    data = tuple(_to_value(block_node.get(i), block_map) for i in range(16))
+    return S_BLOCK.pack(*data)
+
+
+def _to_value(item, block_map):
+    """persist any value as an integer"""
+    if item is None:
+        return NO_ENTRY
+    elif isinstance(item, dict):
+        return block_map[id(item)]
+    else:
+        return _transform_rev(item)
diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
--- a/mercurial/debugcommands.py
+++ b/mercurial/debugcommands.py
@@ -94,7 +94,10 @@ 
     stringutil,
 )
 
-from .revlogutils import deltas as deltautil
+from .revlogutils import (
+    deltas as deltautil,
+    nodemap,
+)
 
 release = lockmod.release
 
@@ -2081,6 +2084,20 @@ 
 
 
 @command(
+    b'debugnodemap',
+    [(b'', b'dump', False, _(b'write persistent binary nodemap on stdin'))],
+)
+def debugnodemap(ui, repo, **opts):
+    """write and inspect on disk nodemap
+    """
+    if opts['dump']:
+        unfi = repo.unfiltered()
+        cl = unfi.changelog
+        data = nodemap.persistent_data(cl.index)
+        ui.write(data)
+
+
+@command(
     b'debugobsolete',
     [
         (b'', b'flags', 0, _(b'markers flag')),