From patchwork Thu Oct 19 08:53:31 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [V2] sparse-read: ignore trailing empty revs in each read chunk From: Paul Morelle X-Patchwork-Id: 25233 Message-Id: To: mercurial-devel@mercurial-scm.org Date: Thu, 19 Oct 2017 10:53:31 +0200 # HG changeset patch # User Paul Morelle # Date 1508333299 -7200 # Wed Oct 18 15:28:19 2017 +0200 # Node ID ef3d9978b7daf5c2152f624b10fffb13425b06db # Parent fb2574bd73a9c0d9a7a88407b20fdabc9213bc20 # EXP-Topic optimized-read # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r ef3d9978b7da sparse-read: ignore trailing empty revs in each read chunk An empty entry in the revlog may happen for two reasons: - when the file is empty, and the revlog stores a snapshot; - when there is a merge and both parents were identical. `hg debugindex -m | awk '$3=="0"{print}' | wc -l` gives 1917 of such entries in my clone of pypy, and 113 on my clone of mercurial. These empty revision may be located at the end of a sparse chain, and in some special cases may lead to read relatively large amounts of data for nothing. diff -r fb2574bd73a9 -r ef3d9978b7da mercurial/revlog.py --- a/mercurial/revlog.py Wed Oct 18 09:07:48 2017 +0200 +++ b/mercurial/revlog.py Wed Oct 18 15:28:19 2017 +0200 @@ -162,6 +162,20 @@ s.update(text) return s.digest() +def _trimchunk(revlog, revs, startidx, endidx=None): + """returns revs[startidx:endidx] without empty trailing revs + """ + length = revlog.length + + if endidx is None: + endidx = len(revs) + + # Trim empty revs at the end, but never the very first revision of a chain + while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0: + endidx -= 1 + + return revs[startidx:endidx] + def _slicechunk(revlog, revs): """slice revs to reduce the amount of unrelated data to be read from disk. @@ -194,6 +208,10 @@ revstart = start(rev) revlen = length(rev) + # Skip empty revisions to form larger holes + if revlen == 0: + continue + if prevend is not None: gapsize = revstart - prevend # only consider holes that are large enough @@ -222,9 +240,16 @@ previdx = 0 while indicesheap: idx = heapq.heappop(indicesheap) - yield revs[previdx:idx] + + chunk = _trimchunk(revlog, revs, previdx, idx) + if chunk: + yield chunk + previdx = idx - yield revs[previdx:] + + chunk = _trimchunk(revlog, revs, previdx) + if chunk: + yield chunk # index v0: # 4 bytes: offset