Submitter | Augie Fackler |
---|---|
Date | May 12, 2016, 1:50 p.m. |
Message ID | <f892953654ff55a96e5c.1463061007@arthedain.pit.corp.google.com> |
Download | mbox | patch |
Permalink | /patch/15077/ |
State | Accepted |
Headers | show |
Comments
On Thu, May 12, 2016 at 6:50 AM, Augie Fackler <raf@durin42.com> wrote: > # HG changeset patch > # User Augie Fackler <augie@google.com> > # Date 1463060354 14400 > # Thu May 12 09:39:14 2016 -0400 > # Node ID f892953654ff55a96e5c80fab5a993f823c508de > # Parent 0e9ed09f5fe9c390e60a98426f64bc3a231f7aa0 > wireproto: optimize handling of large batch responses > > Now that batch can be used by remotefilelog, the quadratic string > copying this was doing was actually disastrous. In my local testing, > fetching a 56 meg file used to take 3 minutes, and now takes only a > few seconds. Nice! > > diff --git a/mercurial/wireproto.py b/mercurial/wireproto.py > --- a/mercurial/wireproto.py > +++ b/mercurial/wireproto.py > @@ -231,17 +231,22 @@ class wirepeer(peer.peerrepository): > for k, v in argsdict.iteritems()) > cmds.append('%s %s' % (op, args)) > rsp = self._callstream("batch", cmds=';'.join(cmds)) > - # TODO this response parsing is probably suboptimal for large > - # batches with large responses. > - work = rsp.read(1024) > - chunk = work > + chunk = rsp.read(1024) > + work = [chunk] > + while ';' not in chunk and chunk: > + chunk = rsp.read(1024) > + work.append(chunk) Nit: These 3 lines look like the last 3 lines of the loop below. Can those 3 be moved to the top of the loop body and these 3 (above) be deleted? > while chunk: > - while ';' in work: > - one, work = work.split(';', 1) > + merged = ''.join(work) > + while ';' in merged: > + one, merged = merged.split(';', 1) > yield unescapearg(one) > chunk = rsp.read(1024) > - work += chunk > - yield unescapearg(work) > + work = [merged, chunk] > + while ';' not in chunk and chunk: > + chunk = rsp.read(1024) > + work.append(chunk) > + yield unescapearg(''.join(work)) > > def _submitone(self, op, args): > return self._call(op, **args) > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
> On May 12, 2016, at 12:18, Martin von Zweigbergk <martinvonz@google.com> wrote: > > On Thu, May 12, 2016 at 6:50 AM, Augie Fackler <raf@durin42.com> wrote: >> # HG changeset patch >> # User Augie Fackler <augie@google.com> >> # Date 1463060354 14400 >> # Thu May 12 09:39:14 2016 -0400 >> # Node ID f892953654ff55a96e5c80fab5a993f823c508de >> # Parent 0e9ed09f5fe9c390e60a98426f64bc3a231f7aa0 >> wireproto: optimize handling of large batch responses >> >> Now that batch can be used by remotefilelog, the quadratic string >> copying this was doing was actually disastrous. In my local testing, >> fetching a 56 meg file used to take 3 minutes, and now takes only a >> few seconds. > > Nice! > >> >> diff --git a/mercurial/wireproto.py b/mercurial/wireproto.py >> --- a/mercurial/wireproto.py >> +++ b/mercurial/wireproto.py >> @@ -231,17 +231,22 @@ class wirepeer(peer.peerrepository): >> for k, v in argsdict.iteritems()) >> cmds.append('%s %s' % (op, args)) >> rsp = self._callstream("batch", cmds=';'.join(cmds)) >> - # TODO this response parsing is probably suboptimal for large >> - # batches with large responses. >> - work = rsp.read(1024) >> - chunk = work >> + chunk = rsp.read(1024) >> + work = [chunk] >> + while ';' not in chunk and chunk: >> + chunk = rsp.read(1024) >> + work.append(chunk) > > Nit: These 3 lines look like the last 3 lines of the loop below. Can > those 3 be moved to the top of the loop body and these 3 (above) be > deleted? Yeah, no reason those can't move. Per irc I won't do a resend. Thanks for catching that! > >> while chunk: >> - while ';' in work: >> - one, work = work.split(';', 1) >> + merged = ''.join(work) >> + while ';' in merged: >> + one, merged = merged.split(';', 1) >> yield unescapearg(one) >> chunk = rsp.read(1024) >> - work += chunk >> - yield unescapearg(work) >> + work = [merged, chunk] >> + while ';' not in chunk and chunk: >> + chunk = rsp.read(1024) >> + work.append(chunk) >> + yield unescapearg(''.join(work)) >> >> def _submitone(self, op, args): >> return self._call(op, **args) >> _______________________________________________ >> Mercurial-devel mailing list >> Mercurial-devel@mercurial-scm.org >> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Pushed to "committed". Thanks! On Thu, May 12, 2016 at 10:22 AM, Augie Fackler <raf@durin42.com> wrote: > >> On May 12, 2016, at 12:18, Martin von Zweigbergk <martinvonz@google.com> wrote: >> >> On Thu, May 12, 2016 at 6:50 AM, Augie Fackler <raf@durin42.com> wrote: >>> # HG changeset patch >>> # User Augie Fackler <augie@google.com> >>> # Date 1463060354 14400 >>> # Thu May 12 09:39:14 2016 -0400 >>> # Node ID f892953654ff55a96e5c80fab5a993f823c508de >>> # Parent 0e9ed09f5fe9c390e60a98426f64bc3a231f7aa0 >>> wireproto: optimize handling of large batch responses >>> >>> Now that batch can be used by remotefilelog, the quadratic string >>> copying this was doing was actually disastrous. In my local testing, >>> fetching a 56 meg file used to take 3 minutes, and now takes only a >>> few seconds. >> >> Nice! >> >>> >>> diff --git a/mercurial/wireproto.py b/mercurial/wireproto.py >>> --- a/mercurial/wireproto.py >>> +++ b/mercurial/wireproto.py >>> @@ -231,17 +231,22 @@ class wirepeer(peer.peerrepository): >>> for k, v in argsdict.iteritems()) >>> cmds.append('%s %s' % (op, args)) >>> rsp = self._callstream("batch", cmds=';'.join(cmds)) >>> - # TODO this response parsing is probably suboptimal for large >>> - # batches with large responses. >>> - work = rsp.read(1024) >>> - chunk = work >>> + chunk = rsp.read(1024) >>> + work = [chunk] >>> + while ';' not in chunk and chunk: >>> + chunk = rsp.read(1024) >>> + work.append(chunk) >> >> Nit: These 3 lines look like the last 3 lines of the loop below. Can >> those 3 be moved to the top of the loop body and these 3 (above) be >> deleted? > > Yeah, no reason those can't move. Per irc I won't do a resend. Thanks for catching that! > >> >>> while chunk: >>> - while ';' in work: >>> - one, work = work.split(';', 1) >>> + merged = ''.join(work) >>> + while ';' in merged: >>> + one, merged = merged.split(';', 1) >>> yield unescapearg(one) >>> chunk = rsp.read(1024) >>> - work += chunk >>> - yield unescapearg(work) >>> + work = [merged, chunk] >>> + while ';' not in chunk and chunk: >>> + chunk = rsp.read(1024) >>> + work.append(chunk) >>> + yield unescapearg(''.join(work)) >>> >>> def _submitone(self, op, args): >>> return self._call(op, **args) >>> _______________________________________________ >>> Mercurial-devel mailing list >>> Mercurial-devel@mercurial-scm.org >>> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel >
Patch
diff --git a/mercurial/wireproto.py b/mercurial/wireproto.py --- a/mercurial/wireproto.py +++ b/mercurial/wireproto.py @@ -231,17 +231,22 @@ class wirepeer(peer.peerrepository): for k, v in argsdict.iteritems()) cmds.append('%s %s' % (op, args)) rsp = self._callstream("batch", cmds=';'.join(cmds)) - # TODO this response parsing is probably suboptimal for large - # batches with large responses. - work = rsp.read(1024) - chunk = work + chunk = rsp.read(1024) + work = [chunk] + while ';' not in chunk and chunk: + chunk = rsp.read(1024) + work.append(chunk) while chunk: - while ';' in work: - one, work = work.split(';', 1) + merged = ''.join(work) + while ';' in merged: + one, merged = merged.split(';', 1) yield unescapearg(one) chunk = rsp.read(1024) - work += chunk - yield unescapearg(work) + work = [merged, chunk] + while ';' not in chunk and chunk: + chunk = rsp.read(1024) + work.append(chunk) + yield unescapearg(''.join(work)) def _submitone(self, op, args): return self._call(op, **args)