Patchwork [4,of,4,V2] obsolete: introduce a new binary encoding for obsmarkers (version 1)

login
register
mail settings
Submitter Pierre-Yves David
Date Sept. 30, 2014, 6:01 p.m.
Message ID <c7bf019925fe5c75d1dd.1412100085@marginatus.alto.octopoid.net>
Download mbox | patch
Permalink /patch/6041/
State Superseded
Commit 974389427e5fbe9b1d06333aaef61f2f24649f17
Headers show

Comments

Pierre-Yves David - Sept. 30, 2014, 6:01 p.m.
# HG changeset patch
# User Pierre-Yves David <pierre-yves.david@fb.com>
# Date 1410372923 -7200
#      Wed Sep 10 20:15:23 2014 +0200
# Node ID c7bf019925fe5c75d1ddb64a22fede495d079c6d
# Parent  aeae860293266be3c53587b69961d2aa55ea11c8
obsolete: introduce a new binary encoding for obsmarkers (version 1)

This new encoding explicitly stores the date and parents allowing a significant
faster marker decoding. See inline documentation for details.

This format is not yet used to store format on disk. But it will be used in
bundle2 exchange if both side support it. Support for on-disk format is coming
in another changesets.

Patch

diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py
--- a/mercurial/obsolete.py
+++ b/mercurial/obsolete.py
@@ -206,13 +206,123 @@  def _fm0encodeonemarker(marker):
     format = _fm0fixed + (_fm0node * nbsuc)
     data = [nbsuc, len(metadata), flags, pre]
     data.extend(sucs)
     return _pack(format, *data) + metadata
 
+## Parsing and writing of version "1"
+#
+# The header is followed by the markers. Each marker is made of:
+#
+# - 1 unsigned integer: total size of the marker (including this field)
+#
+# - 1 64 bits float: date in second since epoch
+#
+# - 1 16 bits integer: timezone offset in minute
+#
+# - 1 unsigned byte: number of succesors "N", can be zero.
+#
+# - 2 byte: a bit field. It is reserved for flags used in common
+#   obsolete marker operations, to avoid repeated decoding of metadata
+#   entries.
+
+#   The highest two bits are use to encode information about number (P) of
+#   percursor's parent stored in the markers:
+#
+#     0: parents data stored but no parent,
+#     1: one parent stored,
+#     2: two parents stored,
+#     3: no parent data stored
+#
+# - 20 bytes: precursor changeset identifier.
+#
+# - N*20 bytes: successors changesets identifiers.
+#
+# - P*20 bytes: parents of the precursors changesets.
+#
+# - remaining bytes: metadata as a sequence of nul-terminated strings. Each
+#   string contains a key and a value, separated by a colon ':', without
+#   additional encoding. Keys cannot contain '\0' or ':' and values cannot
+#   contain '\0'.
+_fm1version = 1
+_fm1fixed = '>IdhBH20s'
+_fm1node = '20s'
+_fm1fsize = struct.calcsize(_fm1fixed)
+_fm1fnodesize = struct.calcsize(_fm1node)
+_fm1parentnone = 3
+_fm1parentshift = 14
+_fm1parentmask = (_fm1parentnone << _fm1parentshift)
+
+def _fm1readmarkers(data, off=0):
+    # Loop on markers
+    l = len(data)
+    while off + _fm1fsize <= l:
+        initialoff = off
+        # read fixed part
+        cur = data[off:off + _fm1fsize]
+        off += _fm1fsize
+        ttsize, seconds, tz, nbsuc, flags, prec = _unpack(_fm1fixed, cur)
+        # extract the number of parent information
+        nbpar = (flags & _fm1parentmask) >> _fm1parentshift
+        flags &= ~ _fm1parentmask
+        if nbpar == _fm1parentnone:
+            nbpar = None
+        # build the date tuple (upgrade tz minute to second)
+        date = (seconds, tz * 60)
+        # read replacement
+        sucs = ()
+        if nbsuc:
+            s = (_fm1fnodesize * nbsuc)
+            cur = data[off:off + s]
+            sucs = _unpack(_fm1node * nbsuc, cur)
+            off += s
+        # read parents
+        if nbpar is None:
+            parents = None
+        elif nbpar == 0:
+            parents = ()
+        elif nbpar:  # neither None nor zero
+            s = (_fm1fnodesize * nbpar)
+            cur = data[off:off + s]
+            parents = _unpack(_fm1node * nbpar, cur)
+            off += s
+        # read metadata
+        # (metadata will be decoded on demand)
+        #
+        # (size of medata data is total size minus all data already read)
+        mdsize = ttsize - (off - initialoff)
+        metadata = data[off:off + mdsize]
+        if len(metadata) != mdsize:
+            raise util.Abort(_('parsing obsolete marker: metadata is too '
+                               'short, %d bytes expected, got %d')
+                             % (mdsize, len(metadata)))
+        off += mdsize
+
+        yield (prec, sucs, flags, metadata, date, parents)
+
+def _fm1encodeonemarker(marker):
+    pre, sucs, flags, metadata, date, parents = marker
+    nbsuc = len(sucs)
+    nbextranodes = nbsuc
+    if parents is None:
+        nbpar = _fm1parentnone
+    else:
+        nbpar = len(parents)
+        nbextranodes += nbpar
+    flags |= (nbpar << _fm1parentshift)
+    format = _fm1fixed + (_fm1node * nbextranodes)
+    totalsize = struct.calcsize(format) + len(metadata)
+    # tz is store in minute so we divide by 60
+    data = [totalsize, date[0], date[1]//60, nbsuc, flags, pre]
+    data.extend(sucs)
+    if parents is not None:
+        data.extend(parents)
+    return _pack(format, *data) + metadata
+
 # mapping to read/write various marker formats
 # <version> -> (decoder, encoder)
-formats = {_fm0version: (_fm0readmarkers, _fm0encodeonemarker)}
+formats = {_fm0version: (_fm0readmarkers, _fm0encodeonemarker),
+           _fm1version: (_fm1readmarkers, _fm1encodeonemarker)}
 
 def _readmarkers(data):
     """Read and enumerate markers from raw data"""
     off = 0
     diskversion = _unpack('>B', data[off:off + 1])[0]