Patchwork [3,of,4] revlog: use persistent file handles during addgroup

login
register
mail settings
Submitter Gregory Szorc
Date July 14, 2015, 8:51 p.m.
Message ID <3c74157a105dc9951477.1436907072@gps-mbp.local>
Download mbox | patch
Permalink /patch/9985/
State Changes Requested
Headers show

Comments

Gregory Szorc - July 14, 2015, 8:51 p.m.
# HG changeset patch
# User Gregory Szorc <gregory.szorc@gmail.com>
# Date 1436903210 25200
#      Tue Jul 14 12:46:50 2015 -0700
# Node ID 3c74157a105dc99514775809a99d7ba0f950c614
# Parent  bcdf6cd26113b06a489b9a3051bcd20a1e44fc4b
revlog: use persistent file handles during addgroup

revlog.addgroup() calls into revlog.revision() whenever a new chain is created.
Previously, every time revlog.revision() requested chunks that weren't
cached, we would open a new file handle, seek, read data, and close the
file handle. For large repositories, this resulted in several extra
open()/close() system calls (thousands when processing changesets on a
repository with over 100,000 changesets).

By caching the read file handle during addgroup(), we go a long way
towards eliminating extra file open()/close() system calls as part of
applying changegroup data.

This change appears to show no statistically significant change in
performance on my MBP. Yet it does reduce the number of system calls
significantly. That doesn't mean it isn't a good idea, however.

Patch

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -1489,63 +1489,66 @@  class revlog(object):
             if dfh:
                 dfh.flush()
             ifh.flush()
         try:
-            # loop through our set of deltas
-            chain = None
-            while True:
-                chunkdata = bundle.deltachunk(chain)
-                if not chunkdata:
-                    break
-                node = chunkdata['node']
-                p1 = chunkdata['p1']
-                p2 = chunkdata['p2']
-                cs = chunkdata['cs']
-                deltabase = chunkdata['deltabase']
-                delta = chunkdata['delta']
+            with self.persistentreadhandle() as refreshreadhandles:
+                # loop through our set of deltas
+                chain = None
+                while True:
+                    chunkdata = bundle.deltachunk(chain)
+                    if not chunkdata:
+                        break
+                    node = chunkdata['node']
+                    p1 = chunkdata['p1']
+                    p2 = chunkdata['p2']
+                    cs = chunkdata['cs']
+                    deltabase = chunkdata['deltabase']
+                    delta = chunkdata['delta']
 
-                content.append(node)
+                    content.append(node)
 
-                link = linkmapper(cs)
-                if node in self.nodemap:
-                    # this can happen if two branches make the same change
-                    chain = node
-                    continue
+                    link = linkmapper(cs)
+                    if node in self.nodemap:
+                        # this can happen if two branches make the same change
+                        chain = node
+                        continue
 
-                for p in (p1, p2):
-                    if p not in self.nodemap:
-                        raise LookupError(p, self.indexfile,
-                                          _('unknown parent'))
+                    for p in (p1, p2):
+                        if p not in self.nodemap:
+                            raise LookupError(p, self.indexfile,
+                                              _('unknown parent'))
 
-                if deltabase not in self.nodemap:
-                    raise LookupError(deltabase, self.indexfile,
-                                      _('unknown delta base'))
+                    if deltabase not in self.nodemap:
+                        raise LookupError(deltabase, self.indexfile,
+                                          _('unknown delta base'))
 
-                baserev = self.rev(deltabase)
+                    baserev = self.rev(deltabase)
 
-                if baserev != nullrev and self.iscensored(baserev):
-                    # if base is censored, delta must be full replacement in a
-                    # single patch operation
-                    hlen = struct.calcsize(">lll")
-                    oldlen = self.rawsize(baserev)
-                    newlen = len(delta) - hlen
-                    if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
-                        raise error.CensoredBaseError(self.indexfile,
-                                                      self.node(baserev))
+                    if baserev != nullrev and self.iscensored(baserev):
+                        # if base is censored, delta must be full replacement
+                        # in a single patch operation
+                        hlen = struct.calcsize(">lll")
+                        oldlen = self.rawsize(baserev)
+                        newlen = len(delta) - hlen
+                        if delta[:hlen] != mdiff.replacediffheader(oldlen,
+                                                                   newlen):
+                            raise error.CensoredBaseError(self.indexfile,
+                                                          self.node(baserev))
 
-                flags = REVIDX_DEFAULT_FLAGS
-                if self._peek_iscensored(baserev, delta, flush):
-                    flags |= REVIDX_ISCENSORED
+                    flags = REVIDX_DEFAULT_FLAGS
+                    if self._peek_iscensored(baserev, delta, flush):
+                        flags |= REVIDX_ISCENSORED
 
-                chain = self._addrevision(node, None, transaction, link,
-                                          p1, p2, flags, (baserev, delta),
-                                          ifh, dfh)
-                if not dfh and not self._inline:
-                    # addrevision switched from inline to conventional
-                    # reopen the index
-                    ifh.close()
-                    dfh = self.opener(self.datafile, "a")
-                    ifh = self.opener(self.indexfile, "a")
+                    chain = self._addrevision(node, None, transaction, link,
+                                              p1, p2, flags, (baserev, delta),
+                                              ifh, dfh)
+                    if not dfh and not self._inline:
+                        # addrevision switched from inline to conventional
+                        # reopen the index
+                        ifh.close()
+                        dfh = self.opener(self.datafile, "a")
+                        ifh = self.opener(self.indexfile, "a")
+                        refreshreadhandles()
         finally:
             if dfh:
                 dfh.close()
             ifh.close()