Patchwork [3,of,6,RFC] obsstore: do not load all markers for precursors and successors

login
register
mail settings
Submitter via Mercurial-devel
Date May 22, 2017, 1:42 a.m.
Message ID <CAESOdVCrFExVZ_5krHuQj2ZqM7EAsswsmybW6HyBQ7rURVFQ0w@mail.gmail.com>
Download mbox | patch
Permalink /patch/20814/
State Not Applicable
Headers show

Comments

via Mercurial-devel - May 22, 2017, 1:42 a.m.
On May 21, 2017 6:31 PM, "Jun Wu" <quark@fb.com> wrote:

# HG changeset patch
# User Jun Wu <quark@fb.com>
# Date 1495397887 25200
#      Sun May 21 13:18:07 2017 -0700
# Node ID 929ea30d953466dc04f0e5d8ca8cd9ed06ca2310
# Parent  1a100daf74d0ea3059f5b841c7c6e9ff46388ad9
# Available At https://bitbucket.org/quark-zju/hg-draft
#              hg pull https://bitbucket.org/quark-zju/hg-draft -r
929ea30d9534
obsstore: do not load all markers for precursors and successors

Use radixlink index to avoid loading all markers.


Why? I'm assuming because it's faster, but where is the timing data?

Same comment for the next few patches.


         return attr in self.__dict__

Patch

diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py
--- a/mercurial/obsolete.py
+++ b/mercurial/obsolete.py
@@ -79,4 +79,5 @@  from . import (
     phases,
     policy,
+    radixlink,
     util,
 )
@@ -440,10 +441,12 @@  formats = {_fm0version: (_fm0readmarkers
            _fm1version: (_fm1readmarkers, _fm1encodeonemarker)}

+def _readmarkerversion(data):
+    return _unpack('>B', data[0:1])[0]
+
 @util.nogc
 def _readmarkers(data):
     """Read and enumerate markers from raw data"""
-    off = 0
-    diskversion = _unpack('>B', data[off:off + 1])[0]
-    off += 1
+    diskversion = _readmarkerversion(data)
+    off = 1
     if diskversion not in formats:
         raise error.Abort(_('parsing obsolete marker: unknown version %r')
@@ -460,4 +463,68 @@  def encodemarkers(markers, addheader=Fal
         yield encodeone(marker)

+class lazymarkerdict(object):
+    def __init__(self, data, vfs, name, nodefunc):
+        self.nodefunc = nodefunc
+        # FIXME: put the file in repo.vfs('cache')
+        self.vfs = vfs
+        self.name = name
+        self.data = data
+        self._cache = {} # {node: [rawmarker]}
+        self._update()
+
+    @util.propertycache
+    def _radixlink(self):
+        return radixlink.radixlink(self.vfs, 'obs-%s' % self.name)
+
+    def _update(self):
+        """incrementally update radixlink cache"""
+        nodefunc = self.nodefunc
+
+        radl = self._radixlink
+        off = radl.truthoffset or 1 # skip 1 byte version in obsstore
+
+        data = self.data
+        stop = len(data) - _fm1fsize
+
+        for pos, mark in parsers.fm1readmarkerswithoffset(data, off, stop):
+            for n in nodefunc(mark):
+                radl.insert(n, pos)
+
+        radl.truthoffset = len(data)
+        radl.flush()
+
+    def get(self, node, default=None):
+        if node in self._cache:
+            result = self._cache[node]
+        else:
+            result = self._calculate(node)
+        return result or default
+
+    def __getitem__(self, node):
+        v = self.get(node)
+        if v is None:
+            raise KeyError(node)
+        return v
+
+    def __contains__(self, node):
+        return self.get(node) is not None
+
+    def _calculate(self, node):
+        """fill cache for givien node from radixlink"""
+        markers = self._cache.setdefault(node, set())
+        offsets = self._radixlink.get(node)
+        data = self.data
+        for pos in offsets:
+            mark = parsers.fm1readmarkers(data, pos, pos + 1)[0]
+            markers.add(mark)
+        return markers
+
+    # compatibility with the vanilla set interface
+
+    def setdefault(self, node, default):
+        assert not default
+        if node in self._cache:
+            return self._cache[node]
+        return self._calculate(node)

 class marker(object):
@@ -661,6 +728,10 @@  class obsstore(object):

     @propertycache
+    def _data(self):
+        return self.svfs.tryread('obsstore')
+
+    @propertycache
     def _all(self):
-        data = self.svfs.tryread('obsstore')
+        data = self._data
         if not data:
             return []
@@ -672,4 +743,8 @@  class obsstore(object):
     @propertycache
     def successors(self):
+        if self._uselazydict:
+            # FIXME: use vfs instead of svfs
+            return lazymarkerdict(self._data, self.svfs, 'successors',
+                                  lambda m: [m[0]])
         successors = {}
         _addsuccessors(successors, self._all)
@@ -678,4 +753,7 @@  class obsstore(object):
     @propertycache
     def precursors(self):
+        if self._uselazydict:
+            return lazymarkerdict(self._data, self.svfs, 'precursors',
+                                  lambda m: m[1])
         precursors = {}
         _addprecursors(precursors, self._all)
@@ -688,4 +766,11 @@  class obsstore(object):
         return children

+    @propertycache
+    def _uselazydict(self):
+        # lazydict only supports marker version 1 and is only available in
C
+        isfm1 = (_readmarkerversion(self._data) == _fm1version)
+        isc = util.safehasattr(parsers, 'fm1readmarkerswithoffset')
+        return isfm1 and isc
+
     def _cached(self, attr):