Patchwork D7838: nodemap: use an intermediate "docket" file to carry small metadata

login
register
mail settings
Submitter phabricator
Date Jan. 11, 2020, 5:04 p.m.
Message ID <differential-rev-PHID-DREV-sndzz7ao5attwjtwyjwf-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/44261/
State New
Headers show

Comments

phabricator - Jan. 11, 2020, 5:04 p.m.
marmoute created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  This intermediate file will make mmapping, transaction and content validation
  easier. (Most of this usefulness will arrive gradually in later changeset). See
  in code comments for details.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D7838

AFFECTED FILES
  mercurial/revlogutils/nodemap.py
  tests/test-persistent-nodemap.t

CHANGE DETAILS




To: marmoute, #hg-reviewers
Cc: mercurial-devel

Patch

diff --git a/tests/test-persistent-nodemap.t b/tests/test-persistent-nodemap.t
--- a/tests/test-persistent-nodemap.t
+++ b/tests/test-persistent-nodemap.t
@@ -10,6 +10,8 @@ 
   > exp-persistent-nodemap=yes
   > EOF
   $ hg debugbuilddag .+5000
+  $ f --size .hg/store/00changelog.n
+  .hg/store/00changelog.n: size=18
   $ hg debugnodemap --dump-new | f --sha256 --size
   size=245760, sha256=5dbe62ab98a26668b544063d4d674ac4452ba903ee8895c52fd21d9bbd771e09
   $ hg debugnodemap --dump-disk | f --sha256 --bytes=256 --hexdump --size
diff --git a/mercurial/revlogutils/nodemap.py b/mercurial/revlogutils/nodemap.py
--- a/mercurial/revlogutils/nodemap.py
+++ b/mercurial/revlogutils/nodemap.py
@@ -8,6 +8,7 @@ 
 
 from __future__ import absolute_import
 
+import os
 import struct
 
 from .. import (
@@ -25,7 +26,20 @@ 
     """read the nodemap for a revlog from disk"""
     if revlog.nodemap_file is None:
         return None
-    return revlog.opener.tryread(revlog.nodemap_file)
+    pdata = revlog.opener.tryread(revlog.nodemap_file)
+    if not pdata:
+        return None
+    offset = 0
+    (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
+    if version != ONDISK_VERSION:
+        return None
+    offset += S_VERSION.size
+    (uuid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
+    offset += S_HEADER.size
+    uid = pdata[offset : offset + uuid_size]
+
+    filename = _rawdata_filepath(revlog, uid)
+    return revlog.opener.tryread(filename)
 
 
 def setup_persistent_nodemap(tr, revlog):
@@ -50,13 +64,69 @@ 
         msg = "calling persist nodemap on a revlog without the feature enableb"
         raise error.ProgrammingError(msg)
     data = persistent_data(revlog.index)
+    uid = _make_uid()
+    datafile = _rawdata_filepath(revlog, uid)
+    # EXP-TODO: if this is a cache, this should use a cache vfs, not a
+    # store vfs
+    with revlog.opener(datafile, 'w') as fd:
+        fd.write(data)
     # EXP-TODO: if this is a cache, this should use a cache vfs, not a
     # store vfs
-    with revlog.opener(revlog.nodemap_file, 'w') as f:
-        f.write(data)
+    with revlog.opener(revlog.nodemap_file, 'w', atomictemp=True) as fp:
+        fp.write(_serialize_docket(uid))
     # EXP-TODO: if the transaction abort, we should remove the new data and
-    # reinstall the old one. (This will be simpler when the file format get a
-    # bit more advanced)
+    # reinstall the old one.
+
+
+### Nodemap docket file
+#
+# The nodemap data are stored on disk using 2 files:
+#
+# * a raw data files containing a serialized nodemap
+#   (see `Nodemap Trie` section)
+#
+# * a small "docket" file containing medatadata
+#
+# While the nodemap data can be multiple tens of megabytes, the "docket" is
+# small, it is easy to update it automatically or to duplicated its content
+# during a transaction.
+#
+# Multiple raw data can exist at the same time (The currently valid one and a
+# new one beind used by an in progress transaction). To accomodate this, the
+# filename hosting the raw data has a variable parts. The exact filename is
+# specified inside the "docket" file.
+#
+# The docket file contains information to find, qualify and validate the raw
+# data. Its content is currently very light, but it will expand as the on disk
+# nodemap gains the necessary features to be used in production.
+
+# version 0 is experimental, no BC garantee, do no use outside of tests.
+ONDISK_VERSION = 0
+
+S_VERSION = struct.Struct(">B")
+S_HEADER = struct.Struct(">B")
+
+ID_SIZE = 8
+
+
+def _make_uid():
+    """return a new unique identifier.
+
+    The identifier is random and composed of ascii characters."""
+    return nodemod.hex(os.urandom(ID_SIZE))
+
+
+def _serialize_docket(uid):
+    data = []
+    data.append(S_VERSION.pack(ONDISK_VERSION))
+    data.append(S_HEADER.pack(len(uid)))
+    data.append(uid)
+    return b''.join(data)
+
+
+def _rawdata_filepath(revlog, uid):
+    prefix = revlog.nodemap_file[:-2]
+    return b"%s-%s.nd" % (prefix, uid)
 
 
 ### Nodemap Trie