From patchwork Sun Jun 4 23:59:27 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [15,of,22] obsstore: add caching for reading a subset of markers From: Jun Wu X-Patchwork-Id: 21192 Message-Id: <0843176578415e364704.1496620767@x1c> To: Date: Sun, 4 Jun 2017 16:59:27 -0700 # HG changeset patch # User Jun Wu # Date 1496602237 25200 # Sun Jun 04 11:50:37 2017 -0700 # Node ID 0843176578415e36470485ac48056952e460efc1 # Parent 31f6eafaf9ce5d4ac95ce289735b8f4ae692e4ae # Available At https://bitbucket.org/quark-zju/hg-draft # hg pull https://bitbucket.org/quark-zju/hg-draft -r 084317657841 obsstore: add caching for reading a subset of markers Previously, we either read all markers and cache them, or we read none. This patch allows us to just read a subset of markers and cache them. For example, "read markers 2, 3, 4, 5" want to read overlapped areas. The new code will make sure markers in the overlapping area are only parsed once. raw obsstore ..................... legend: read markers 1 ========= . raw marker data read markers 2 ====--------- = parse needed read markers 3 ----- - cache hit raw obsstore .....................++++++ + appended marker data read markers 4 -----------====== read markers 5 ---------- read markers 6 ===----------------- We need to read obsstore._data[offset:] to incrementally build indexes. A side effect of this patch is offsets are being read and returned so it in theory slows down the code a bit. However, the impact seems negligible: "hg log -r ." on hg-committed was 1.045s before and 1.050s after (best of 40 runs). Besides, the slow-down is temporary. Follow-ups will add proper indexes to greatly improve the perf. diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py --- a/mercurial/obsolete.py +++ b/mercurial/obsolete.py @@ -70,4 +70,5 @@ comment associated with each format for from __future__ import absolute_import +import bisect import errno import struct @@ -548,4 +549,62 @@ class markerindex(dict): self.sourceoftruthsize = len(allmarkers) +class markerreader(object): + """read a range of markers with proper caching""" + + def __init__(self, obsstore): + self._obsstore = weakref.proxy(obsstore) + self._markers = [] + self._offsets = [] + self._lastlen = 0 + + def read(self, offset=0): + """parse obsstore._data[offset:], return (markers, offsets)""" + # data: unparsed.left + parsed + unparsed.right + # ^^^^^^ + # covered by self._markers and self._offsets + + data = self._obsstore._data + + # simple case: offset exceeding data size + if offset >= len(data): + return [], [] + offsets = self._offsets + markers = self._markers + + # simple case: no "parsed". load whatever the caller wants + if not offsets: + _version, (newmarkers, newoffsets) = _readmarkers( + data, offset, withoffsets=True) + markers[:] = newmarkers + offsets[:] = newoffsets + self._lastlen = len(data) + return markers, offsets + + # extend "parsed" to include "unparsed.right" + if len(data) > self._lastlen: + _version, (newmarkers, newoffsets) = _readmarkers( + data, self._lastlen, withoffsets=True) + markers.extend(newmarkers) + offsets.extend(newoffsets) + self._lastlen = len(data) + + # data: unparsed.left + parsed + # ^ offset + # if offset is in unparsed area, extend "parsed" to include it + offset = max(1, offset) # skip 1-byte version + if offset < offsets[0]: + _version, (newmarkers, newoffsets) = _readmarkers( + data, offset, offsets[0], withoffsets=True) + markers[0:0] = newmarkers + offsets[0:0] = newoffsets + + # fast path: return directly + if offsets[0] == offset: + return markers, offsets + + # offset should be in "parsed" area now + i = bisect.bisect_left(offsets, offset) + return markers[i:], offsets[i:] + def _checkinvalidmarkers(markers): """search for marker with invalid data and raise error if needed @@ -697,4 +756,12 @@ class obsstore(object): @propertycache + def _markerreader(self): + return markerreader(self) + + def _readmarkers(self, offset=0): + """return (markers, offsets)""" + return self._markerreader.read(offset) + + @propertycache def _version(self): if len(self._data) >= 1: @@ -705,8 +772,5 @@ class obsstore(object): @propertycache def _all(self): - data = self._data - if not data: - return [] - self._version, markers = _readmarkers(data) + markers, _offsets = self._readmarkers() _checkinvalidmarkers(markers) return markers