Submitter | timeless |
---|---|
Date | May 12, 2016, 1:23 a.m. |
Message ID | <0032690e7a7869470063.1463016195@gcc2-power8.osuosl.org> |
Download | mbox | patch |
Permalink | /patch/15055/ |
State | Changes Requested |
Headers | show |
Comments
On Thu, 12 May 2016 01:23:15 +0000, timeless wrote: > # HG changeset patch > # User timeless <timeless@mozdev.org> > # Date 1461083470 0 > # Tue Apr 19 16:31:10 2016 +0000 > # Node ID 0032690e7a78694700632fe1fed541ab2f2ff3b0 > # Parent d0b1768012385bb1ceacace2f6c32ee28a29f46e > # Available At bb://timeless/mercurial-crew > # hg pull bb://timeless/mercurial-crew -r 0032690e7a78 > bytearray: replace array.array with bytearray > > The array.array api changes significantly between py2 and py3. > OTOH, bytearray was backported to py2.6+, and thus is supported > everywhere Mercurial runs, and its interface is fairly consistent > between py2 and py3. > > Mercurial just wants an array that manages bytes, > so this fits the bill nicely. At first glance, this patch looks good to me. But as you know, bytearray() requires extra str() calls. So I'll have to review this patch more carefully. If nobody review this, I'll revisit it later.
On 12 May 2016 at 02:23, timeless <timeless@fmr.im> wrote: > # HG changeset patch > # User timeless <timeless@mozdev.org> > # Date 1461083470 0 > # Tue Apr 19 16:31:10 2016 +0000 > # Node ID 0032690e7a78694700632fe1fed541ab2f2ff3b0 > # Parent d0b1768012385bb1ceacace2f6c32ee28a29f46e > # Available At bb://timeless/mercurial-crew > # hg pull bb://timeless/mercurial-crew -r 0032690e7a78 > bytearray: replace array.array with bytearray > > The array.array api changes significantly between py2 and py3. > OTOH, bytearray was backported to py2.6+, and thus is supported > everywhere Mercurial runs, and its interface is fairly consistent > between py2 and py3. > > Mercurial just wants an array that manages bytes, > so this fits the bill nicely. > > diff -r d0b176801238 -r 0032690e7a78 mercurial/branchmap.py > --- a/mercurial/branchmap.py Wed May 11 23:27:40 2016 +0000 > +++ b/mercurial/branchmap.py Tue Apr 19 16:31:10 2016 +0000 > @@ -7,7 +7,6 @@ > > from __future__ import absolute_import > > -import array > import struct > import time > > @@ -23,7 +22,6 @@ > scmutil, > ) > > -array = array.array > calcsize = struct.calcsize > pack = struct.pack > unpack = struct.unpack > @@ -357,7 +355,7 @@ > assert repo.filtername is None > self._repo = repo > self._names = [] # branch names in local encoding with static index > - self._rbcrevs = array('c') # structs of type _rbcrecfmt > + self._rbcrevs = bytearray() # structs of type _rbcrecfmt > self._rbcsnameslen = 0 > try: > bndata = repo.vfs.read(_rbcnames) > @@ -371,7 +369,7 @@ > if self._names: > try: > data = repo.vfs.read(_rbcrevs) > - self._rbcrevs.fromstring(data) > + self._rbcrevs = bytearray(data) > except (IOError, OSError) as inst: > repo.ui.debug("couldn't read revision branch cache: %s\n" % > inst) > @@ -389,8 +387,7 @@ > self._rbcnamescount = 0 > self._namesreverse.clear() > self._rbcrevslen = len(self._repo.changelog) > - self._rbcrevs = array('c') > - self._rbcrevs.fromstring('\0' * (self._rbcrevslen * _rbcrecsize)) > + self._rbcrevs = bytearray('\0' * (self._rbcrevslen * _rbcrecsize)) Just pass in the integer result: self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize) This produces a bytearray of that size, filled with null bytes. > def branchinfo(self, rev): > """Return branch name and close flag for rev, using and updating > @@ -450,8 +447,7 @@ > def _setcachedata(self, rev, node, branchidx): > """Writes the node's branch data to the in-memory cache data.""" > rbcrevidx = rev * _rbcrecsize > - rec = array('c') > - rec.fromstring(pack(_rbcrecfmt, node, branchidx)) > + rec = bytearray(pack(_rbcrecfmt, node, branchidx)) > self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec > self._rbcrevslen = min(self._rbcrevslen, rev) > > diff -r d0b176801238 -r 0032690e7a78 mercurial/exchange.py > --- a/mercurial/exchange.py Wed May 11 23:27:40 2016 +0000 > +++ b/mercurial/exchange.py Tue Apr 19 16:31:10 2016 +0000 > @@ -1638,7 +1638,7 @@ > chunks.extend([node, fnode]) > > if chunks: > - bundler.newpart('hgtagsfnodes', data=''.join(chunks)) > + bundler.newpart('hgtagsfnodes', data=''.join([str(a) for a in chunks])) Would starting chunks as a bytearray, then extending *that* instead of a list, not work better? That would avoid creating a series of intermediate strings here. chunks = bytearray() for node in outgoing.missingheads: # Don't compute missing, as this may slow down serving. fnode = cache.getfnode(node, computemissing=False) if fnode is not None: chunks += node chunks += fnode Now you have just one bytestring object to convert to str. > def check_heads(repo, their_heads, context): > """check if the heads of a repo have been modified > diff -r d0b176801238 -r 0032690e7a78 mercurial/manifest.py > --- a/mercurial/manifest.py Wed May 11 23:27:40 2016 +0000 > +++ b/mercurial/manifest.py Tue Apr 19 16:31:10 2016 +0000 > @@ -7,7 +7,6 @@ > > from __future__ import absolute_import > > -import array > import heapq > import os > import struct > @@ -338,7 +337,7 @@ > return self._lm.text() > > def fastdelta(self, base, changes): > - """Given a base manifest text as an array.array and a list of changes > + """Given a base manifest text as a bytearray and a list of changes > relative to that text, compute a delta that can be used by revlog. > """ > delta = [] > @@ -384,8 +383,8 @@ > else: > # For large changes, it's much cheaper to just build the text and > # diff it. > - arraytext = array.array('c', self.text()) > - deltatext = mdiff.textdiff(base, arraytext) > + arraytext = bytearray(self.text()) > + deltatext = mdiff.textdiff(str(base), str(arraytext)) > > return arraytext, deltatext If self.text() is a string already, why convert to a bytearray then back to str? Better use text = self.text() arraytext = bytearray(text) deltatext = mdiff.textdiff(str(base), text) > @@ -443,12 +442,12 @@ > # for large addlist arrays, building a new array is cheaper > # than repeatedly modifying the existing one > currentposition = 0 > - newaddlist = array.array('c') > + newaddlist = bytearray() > > for start, end, content in x: > newaddlist += addlist[currentposition:start] > if content: > - newaddlist += array.array('c', content) > + newaddlist += bytearray(content) > > currentposition = end > > @@ -1015,7 +1014,7 @@ > else: > text = self.revision(node) > m = self._newmanifest(text) > - arraytext = array.array('c', text) > + arraytext = bytearray(text) > self._mancache[node] = (m, arraytext) > return m > > @@ -1065,7 +1064,7 @@ > else: > text = m.text(self._usemanifestv2) > n = self.addrevision(text, transaction, link, p1, p2) > - arraytext = array.array('c', text) > + arraytext = bytearray(text) > > self._mancache[n] = (m, arraytext) > > diff -r d0b176801238 -r 0032690e7a78 mercurial/revlog.py > --- a/mercurial/revlog.py Wed May 11 23:27:40 2016 +0000 > +++ b/mercurial/revlog.py Tue Apr 19 16:31:10 2016 +0000 > @@ -352,7 +352,7 @@ > > def rev(self, node): > try: > - return self._nodecache[node] > + return self._nodecache[str(node)] > except TypeError: > raise > except RevlogError: > diff -r d0b176801238 -r 0032690e7a78 mercurial/tags.py > --- a/mercurial/tags.py Wed May 11 23:27:40 2016 +0000 > +++ b/mercurial/tags.py Tue Apr 19 16:31:10 2016 +0000 > @@ -12,7 +12,6 @@ > > from __future__ import absolute_import > > -import array > import errno > import time > > @@ -28,8 +27,6 @@ > util, > ) > > -array = array.array > - > # Tags computation can be expensive and caches exist to make it fast in > # the common case. > # > @@ -113,8 +110,8 @@ > "tag cache returned bogus head %s" % short(head) > > fnode = tagfnode.get(head) > - if fnode and fnode not in seen: > - seen.add(fnode) > + if fnode and str(fnode) not in seen: > + seen.add(str(fnode)) > if not fctx: > fctx = repo.filectx('.hgtags', fileid=fnode) > else: > @@ -430,13 +427,12 @@ > self.lookupcount = 0 > self.hitcount = 0 > > - self._raw = array('c') > - > try: > data = repo.vfs.read(_fnodescachefile) > except (OSError, IOError): > data = "" > - self._raw.fromstring(data) > + data = repo.vfs.tryread(_fnodescachefile) > + self._raw = bytearray(data) data is set twice now; you didn't remove the try..except. > # The end state of self._raw is an array that is of the exact length > # required to hold a record for every revision in the repository. > @@ -477,7 +473,7 @@ > self.lookupcount += 1 > > offset = rev * _fnodesrecsize > - record = self._raw[offset:offset + _fnodesrecsize].tostring() > + record = self._raw[offset:offset + _fnodesrecsize] > properprefix = node[0:4] > > # Validate and return existing entry. > @@ -518,7 +514,7 @@ > > def _writeentry(self, offset, prefix, fnode): > # Slices on array instances only accept other array. > - entry = array('c', prefix + fnode) > + entry = bytearray(prefix + fnode) > self._raw[offset:offset + _fnodesrecsize] = entry > # self._dirtyoffset could be None. > self._dirtyoffset = min(self._dirtyoffset, offset) or 0 > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
On Wed, May 18, 2016 at 12:08 PM, Martijn Pieters <mj@zopatista.com> wrote: > Just pass in the integer result: > self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize) > This produces a bytearray of that size, filled with null bytes. will do > Would starting chunks as a bytearray, then extending *that* instead of > a list, not work better? That would avoid creating a series of > intermediate strings here. > > chunks = bytearray() > > for node in outgoing.missingheads: > # Don't compute missing, as this may slow down serving. > fnode = cache.getfnode(node, computemissing=False) > if fnode is not None: > chunks += node > chunks += fnode > > Now you have just one bytestring object to convert to str. It definitely works. I'll include it with a v2. >> - deltatext = mdiff.textdiff(base, arraytext) >> + arraytext = bytearray(self.text()) >> + deltatext = mdiff.textdiff(str(base), str(arraytext)) >> >> return arraytext, deltatext > > If self.text() is a string already, why convert to a bytearray then > back to str? I'll add a comment. The short answer is that mdiff.textdiff (~ bdiff) currently takes a string, but it shouldn't. It should take a bytearray. The goal of this commit is to get rid of array.array. In a separate commit, I'll convert bdiff to accept bytearrays. >> @@ -430,13 +427,12 @@ >> try: >> data = repo.vfs.read(_fnodescachefile) >> except (OSError, IOError): >> data = "" >> - self._raw.fromstring(data) >> + data = repo.vfs.tryread(_fnodescachefile) >> + self._raw = bytearray(data) > > data is set twice now; you didn't remove the try..except. Oops. That was a mis-merge. I didn't check the history when I did the merge. I'll remove the first insertion.
Patch
diff -r d0b176801238 -r 0032690e7a78 mercurial/branchmap.py --- a/mercurial/branchmap.py Wed May 11 23:27:40 2016 +0000 +++ b/mercurial/branchmap.py Tue Apr 19 16:31:10 2016 +0000 @@ -7,7 +7,6 @@ from __future__ import absolute_import -import array import struct import time @@ -23,7 +22,6 @@ scmutil, ) -array = array.array calcsize = struct.calcsize pack = struct.pack unpack = struct.unpack @@ -357,7 +355,7 @@ assert repo.filtername is None self._repo = repo self._names = [] # branch names in local encoding with static index - self._rbcrevs = array('c') # structs of type _rbcrecfmt + self._rbcrevs = bytearray() # structs of type _rbcrecfmt self._rbcsnameslen = 0 try: bndata = repo.vfs.read(_rbcnames) @@ -371,7 +369,7 @@ if self._names: try: data = repo.vfs.read(_rbcrevs) - self._rbcrevs.fromstring(data) + self._rbcrevs = bytearray(data) except (IOError, OSError) as inst: repo.ui.debug("couldn't read revision branch cache: %s\n" % inst) @@ -389,8 +387,7 @@ self._rbcnamescount = 0 self._namesreverse.clear() self._rbcrevslen = len(self._repo.changelog) - self._rbcrevs = array('c') - self._rbcrevs.fromstring('\0' * (self._rbcrevslen * _rbcrecsize)) + self._rbcrevs = bytearray('\0' * (self._rbcrevslen * _rbcrecsize)) def branchinfo(self, rev): """Return branch name and close flag for rev, using and updating @@ -450,8 +447,7 @@ def _setcachedata(self, rev, node, branchidx): """Writes the node's branch data to the in-memory cache data.""" rbcrevidx = rev * _rbcrecsize - rec = array('c') - rec.fromstring(pack(_rbcrecfmt, node, branchidx)) + rec = bytearray(pack(_rbcrecfmt, node, branchidx)) self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec self._rbcrevslen = min(self._rbcrevslen, rev) diff -r d0b176801238 -r 0032690e7a78 mercurial/exchange.py --- a/mercurial/exchange.py Wed May 11 23:27:40 2016 +0000 +++ b/mercurial/exchange.py Tue Apr 19 16:31:10 2016 +0000 @@ -1638,7 +1638,7 @@ chunks.extend([node, fnode]) if chunks: - bundler.newpart('hgtagsfnodes', data=''.join(chunks)) + bundler.newpart('hgtagsfnodes', data=''.join([str(a) for a in chunks])) def check_heads(repo, their_heads, context): """check if the heads of a repo have been modified diff -r d0b176801238 -r 0032690e7a78 mercurial/manifest.py --- a/mercurial/manifest.py Wed May 11 23:27:40 2016 +0000 +++ b/mercurial/manifest.py Tue Apr 19 16:31:10 2016 +0000 @@ -7,7 +7,6 @@ from __future__ import absolute_import -import array import heapq import os import struct @@ -338,7 +337,7 @@ return self._lm.text() def fastdelta(self, base, changes): - """Given a base manifest text as an array.array and a list of changes + """Given a base manifest text as a bytearray and a list of changes relative to that text, compute a delta that can be used by revlog. """ delta = [] @@ -384,8 +383,8 @@ else: # For large changes, it's much cheaper to just build the text and # diff it. - arraytext = array.array('c', self.text()) - deltatext = mdiff.textdiff(base, arraytext) + arraytext = bytearray(self.text()) + deltatext = mdiff.textdiff(str(base), str(arraytext)) return arraytext, deltatext @@ -443,12 +442,12 @@ # for large addlist arrays, building a new array is cheaper # than repeatedly modifying the existing one currentposition = 0 - newaddlist = array.array('c') + newaddlist = bytearray() for start, end, content in x: newaddlist += addlist[currentposition:start] if content: - newaddlist += array.array('c', content) + newaddlist += bytearray(content) currentposition = end @@ -1015,7 +1014,7 @@ else: text = self.revision(node) m = self._newmanifest(text) - arraytext = array.array('c', text) + arraytext = bytearray(text) self._mancache[node] = (m, arraytext) return m @@ -1065,7 +1064,7 @@ else: text = m.text(self._usemanifestv2) n = self.addrevision(text, transaction, link, p1, p2) - arraytext = array.array('c', text) + arraytext = bytearray(text) self._mancache[n] = (m, arraytext) diff -r d0b176801238 -r 0032690e7a78 mercurial/revlog.py --- a/mercurial/revlog.py Wed May 11 23:27:40 2016 +0000 +++ b/mercurial/revlog.py Tue Apr 19 16:31:10 2016 +0000 @@ -352,7 +352,7 @@ def rev(self, node): try: - return self._nodecache[node] + return self._nodecache[str(node)] except TypeError: raise except RevlogError: diff -r d0b176801238 -r 0032690e7a78 mercurial/tags.py --- a/mercurial/tags.py Wed May 11 23:27:40 2016 +0000 +++ b/mercurial/tags.py Tue Apr 19 16:31:10 2016 +0000 @@ -12,7 +12,6 @@ from __future__ import absolute_import -import array import errno import time @@ -28,8 +27,6 @@ util, ) -array = array.array - # Tags computation can be expensive and caches exist to make it fast in # the common case. # @@ -113,8 +110,8 @@ "tag cache returned bogus head %s" % short(head) fnode = tagfnode.get(head) - if fnode and fnode not in seen: - seen.add(fnode) + if fnode and str(fnode) not in seen: + seen.add(str(fnode)) if not fctx: fctx = repo.filectx('.hgtags', fileid=fnode) else: @@ -430,13 +427,12 @@ self.lookupcount = 0 self.hitcount = 0 - self._raw = array('c') - try: data = repo.vfs.read(_fnodescachefile) except (OSError, IOError): data = "" - self._raw.fromstring(data) + data = repo.vfs.tryread(_fnodescachefile) + self._raw = bytearray(data) # The end state of self._raw is an array that is of the exact length # required to hold a record for every revision in the repository. @@ -477,7 +473,7 @@ self.lookupcount += 1 offset = rev * _fnodesrecsize - record = self._raw[offset:offset + _fnodesrecsize].tostring() + record = self._raw[offset:offset + _fnodesrecsize] properprefix = node[0:4] # Validate and return existing entry. @@ -518,7 +514,7 @@ def _writeentry(self, offset, prefix, fnode): # Slices on array instances only accept other array. - entry = array('c', prefix + fnode) + entry = bytearray(prefix + fnode) self._raw[offset:offset + _fnodesrecsize] = entry # self._dirtyoffset could be None. self._dirtyoffset = min(self._dirtyoffset, offset) or 0