From patchwork Mon Dec 21 04:04:12 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [4,of,6] revlog: refactor delta chain computation into own function From: Gregory Szorc X-Patchwork-Id: 12197 Message-Id: To: mercurial-devel@selenic.com Date: Sun, 20 Dec 2015 20:04:12 -0800 # HG changeset patch # User Gregory Szorc # Date 1450666565 28800 # Sun Dec 20 18:56:05 2015 -0800 # Node ID db6108f5d52fd9422b0136d30bf93faa06a6b1d0 # Parent 972ebaece5fa6718cbf844100f983292524588ca revlog: refactor delta chain computation into own function This code is already written in multiple locations. While this code needs to be fast and extracting it to its own function adds overhead, code paths reading delta chains typically read, decompress, and do binary patching on revlog data from the delta chain. This other work (especially zlib decompression) almost certainly accounts for a lot more time than the overhead of introducing a Python function call. So I'm not worried about the performance impact of this change. diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -425,16 +425,51 @@ class revlog(object): else: # Add text length of base since decompressing that also takes # work. For cache hits the length is already included. compresseddeltalen += e[1] r = (clen, compresseddeltalen) chaininfocache[rev] = r return r + def _deltachain(self, rev, stoprev=None): + """Obtain the delta chain for a revision. + + ``stoprev`` specifies a revision to stop at. If not specified, we + stop at the base of the chain. + + Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of + revs in ascending order and ``stopped`` is a bool indicating whether + ``stoprev`` was hit. + """ + chain = [] + + # Alias to prevent attribute lookup in tight loop. + index = self.index + generaldelta = self._generaldelta + + iterrev = rev + e = index[iterrev] + while iterrev != e[3] and iterrev != stoprev: + chain.append(iterrev) + if generaldelta: + iterrev = e[3] + else: + iterrev -= 1 + e = index[iterrev] + + if iterrev == stoprev: + stopped = True + else: + chain.append(iterrev) + stopped = False + + chain.reverse() + return chain, stopped + def flags(self, rev): return self.index[rev][0] & 0xFFFF def rawsize(self, rev): """return the length of the uncompressed text for a given revision""" l = self.index[rev][2] if l >= 0: return l @@ -1155,36 +1190,19 @@ class revlog(object): if rev is None: rev = self.rev(node) # check rev flags if self.flags(rev) & ~REVIDX_KNOWN_FLAGS: raise RevlogError(_('incompatible revision flag %x') % (self.flags(rev) & ~REVIDX_KNOWN_FLAGS)) - # build delta chain - chain = [] - index = self.index # for performance - generaldelta = self._generaldelta - iterrev = rev - e = index[iterrev] - while iterrev != e[3] and iterrev != cachedrev: - chain.append(iterrev) - if generaldelta: - iterrev = e[3] - else: - iterrev -= 1 - e = index[iterrev] - - if iterrev == cachedrev: - # cache hit + chain, stopped = self._deltachain(rev, stoprev=cachedrev) + if stopped: text = self._cache[2] - else: - chain.append(iterrev) - chain.reverse() # drop cache to save memory self._cache = None bins = self._chunks(chain, df=_df) if text is None: text = str(bins[0]) bins = bins[1:]