Patchwork [1,of,3] keepalive: don't concatenate strings when reading chunked transfer

login
register
mail settings
Submitter Gregory Szorc
Date Dec. 26, 2016, 6:53 p.m.
Message ID <a1601f6c4e504f9e9ee8.1482778405@gps-mbp.local>
Download mbox | patch
Permalink /patch/18041/
State Accepted
Headers show

Comments

Gregory Szorc - Dec. 26, 2016, 6:53 p.m.
# HG changeset patch
# User Gregory Szorc <gregory.szorc@gmail.com>
# Date 1444257232 25200
#      Wed Oct 07 15:33:52 2015 -0700
# Node ID a1601f6c4e504f9e9ee8174f9390398b622ef219
# Parent  0064a1eb28e246ded9b726c696d048143d1b23f1
keepalive: don't concatenate strings when reading chunked transfer

Surprisingly, this didn't appear to speed up HTTP-based stream cloning
on my machine. I suspect this has more to do with the fact we're using
small HTTP chunks and string concatenation overhead isn't so bad.
However, the reasons for this change are solid: we know string
concatenation can be a performance sink.

Patch

diff --git a/mercurial/keepalive.py b/mercurial/keepalive.py
--- a/mercurial/keepalive.py
+++ b/mercurial/keepalive.py
@@ -398,12 +398,10 @@  class HTTPResponse(httplib.HTTPResponse)
 
     # stolen from Python SVN #68532 to fix issue1088
     def _read_chunked(self, amt):
         chunk_left = self.chunk_left
-        value = ''
+        parts = []
 
-        # XXX This accumulates chunks by repeated string concatenation,
-        # which is not efficient as the number or size of chunks gets big.
         while True:
             if chunk_left is None:
                 line = self.fp.readline()
                 i = line.find(';')
@@ -414,24 +412,24 @@  class HTTPResponse(httplib.HTTPResponse)
                 except ValueError:
                     # close the connection as protocol synchronization is
                     # probably lost
                     self.close()
-                    raise httplib.IncompleteRead(value)
+                    raise httplib.IncompleteRead(''.join(parts))
                 if chunk_left == 0:
                     break
             if amt is None:
-                value += self._safe_read(chunk_left)
+                parts.append(self._safe_read(chunk_left))
             elif amt < chunk_left:
-                value += self._safe_read(amt)
+                parts.append(self._safe_read(amt))
                 self.chunk_left = chunk_left - amt
-                return value
+                return ''.join(parts)
             elif amt == chunk_left:
-                value += self._safe_read(amt)
+                parts.append(self._safe_read(amt))
                 self._safe_read(2)  # toss the CRLF at the end of the chunk
                 self.chunk_left = None
-                return value
+                return ''.join(parts)
             else:
-                value += self._safe_read(chunk_left)
+                parts.append(self._safe_read(chunk_left))
                 amt -= chunk_left
 
             # we read the whole chunk, get another
             self._safe_read(2)      # toss the CRLF at the end of the chunk
@@ -450,9 +448,9 @@  class HTTPResponse(httplib.HTTPResponse)
 
         # we read everything; close the "file"
         self.close()
 
-        return value
+        return ''.join(parts)
 
     def readline(self, limit=-1):
         i = self._rbuf.find('\n')
         while i < 0 and not (0 < limit <= len(self._rbuf)):