Submitter | David Schleimer |
---|---|
Date | May 29, 2014, 5:43 p.m. |
Message ID | <6d2cebe560482e230509.1401385397@devbig100.prn2.facebook.com> |
Download | mbox | patch |
Permalink | /patch/4895/ |
State | Accepted |
Commit | 90e3fcd9e6e4e68d7cedb841ea921d6622db29ce |
Headers | show |
Comments
On May 29, 2014, at 1:43 PM, David Schleimer <dschleimer@fb.com> wrote: > # HG changeset patch > # User David Schleimer <dschleimer@fb.com> > # Date 1401250344 25200 > # Tue May 27 21:12:24 2014 -0700 > # Node ID 6d2cebe560482e230509a0c4ba468d8e5bb50ddd > # Parent 652e07debf10193f4973a48ead96a95e81d0a55b > convert: drastically speed up git conversions Queued enthusiastically. Thanks a ton! (Used git log --grep 'cat-file --batch' and gave up when I was back in 2009 and the flag was still mentioned, so I'm thinking this is pretty safe.) > > We would formerly exec git cat-file once for every commit, plus once for > every tree and file we wnated to read. This switches to using git > cat-file's batch mode, which is much, much, much faster. > > Using this new code, converting the git git repo to hg ran in 106 > minutes on my machine. Using the stock mercurial, it required 1239 > minutes. I believe this to be typical of the speedups we will see > form this patch. > > diff --git a/hgext/convert/git.py b/hgext/convert/git.py > --- a/hgext/convert/git.py > +++ b/hgext/convert/git.py > @@ -46,6 +46,18 @@ > del os.environ['GIT_DIR'] > else: > os.environ['GIT_DIR'] = prevgitdir > + > + def gitpipe(self, s): > + prevgitdir = os.environ.get('GIT_DIR') > + os.environ['GIT_DIR'] = self.path > + try: > + return util.popen3(s) > + finally: > + if prevgitdir is None: > + del os.environ['GIT_DIR'] > + else: > + os.environ['GIT_DIR'] = prevgitdir > + > else: > def gitopen(self, s, err=None): > if err == subprocess.PIPE: > @@ -56,6 +68,9 @@ > else: > return util.popen('GIT_DIR=%s %s' % (self.path, s), 'rb') > > + def gitpipe(self, s): > + return util.popen3('GIT_DIR=%s %s' % (self.path, s)) > + > def popen_with_stderr(self, s): > p = subprocess.Popen(s, shell=True, bufsize=-1, > close_fds=util.closefds, > @@ -84,6 +99,12 @@ > self.path = path > self.submodules = [] > > + self.catfilepipe = self.gitpipe('git cat-file --batch') > + > + def after(self): > + for f in self.catfilepipe: > + f.close() > + > def getheads(self): > if not self.rev: > heads, ret = self.gitread('git rev-parse --branches --remotes') > @@ -98,9 +119,17 @@ > def catfile(self, rev, type): > if rev == hex(nullid): > raise IOError > - data, ret = self.gitread("git cat-file %s %s" % (type, rev)) > - if ret: > + self.catfilepipe[0].write(rev+'\n') > + self.catfilepipe[0].flush() > + info = self.catfilepipe[1].readline().split() > + if info[1] != type: > raise util.Abort(_('cannot read %r object at %s') % (type, rev)) > + size = int(info[2]) > + data = self.catfilepipe[1].read(size) > + if len(data) < size: > + raise util.Abort(_('cannot read %r object at %s: %s') % (type, rev)) > + # read the trailing newline > + self.catfilepipe[1].read(1) > return data > > def getfile(self, name, rev): > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel
Patch
diff --git a/hgext/convert/git.py b/hgext/convert/git.py --- a/hgext/convert/git.py +++ b/hgext/convert/git.py @@ -46,6 +46,18 @@ del os.environ['GIT_DIR'] else: os.environ['GIT_DIR'] = prevgitdir + + def gitpipe(self, s): + prevgitdir = os.environ.get('GIT_DIR') + os.environ['GIT_DIR'] = self.path + try: + return util.popen3(s) + finally: + if prevgitdir is None: + del os.environ['GIT_DIR'] + else: + os.environ['GIT_DIR'] = prevgitdir + else: def gitopen(self, s, err=None): if err == subprocess.PIPE: @@ -56,6 +68,9 @@ else: return util.popen('GIT_DIR=%s %s' % (self.path, s), 'rb') + def gitpipe(self, s): + return util.popen3('GIT_DIR=%s %s' % (self.path, s)) + def popen_with_stderr(self, s): p = subprocess.Popen(s, shell=True, bufsize=-1, close_fds=util.closefds, @@ -84,6 +99,12 @@ self.path = path self.submodules = [] + self.catfilepipe = self.gitpipe('git cat-file --batch') + + def after(self): + for f in self.catfilepipe: + f.close() + def getheads(self): if not self.rev: heads, ret = self.gitread('git rev-parse --branches --remotes') @@ -98,9 +119,17 @@ def catfile(self, rev, type): if rev == hex(nullid): raise IOError - data, ret = self.gitread("git cat-file %s %s" % (type, rev)) - if ret: + self.catfilepipe[0].write(rev+'\n') + self.catfilepipe[0].flush() + info = self.catfilepipe[1].readline().split() + if info[1] != type: raise util.Abort(_('cannot read %r object at %s') % (type, rev)) + size = int(info[2]) + data = self.catfilepipe[1].read(size) + if len(data) < size: + raise util.Abort(_('cannot read %r object at %s: %s') % (type, rev)) + # read the trailing newline + self.catfilepipe[1].read(1) return data def getfile(self, name, rev):