Submitter | Stanislau Hlebik |
---|---|
Date | Dec. 9, 2016, 11:16 a.m. |
Message ID | <001ceadc2bc36699bdf8.1481282161@devvm957.lla2.facebook.com> |
Download | mbox | patch |
Permalink | /patch/17881/ |
State | Changes Requested |
Headers | show |
Comments
On 12/09/2016 12:16 PM, Stanislau Hlebik wrote: > # HG changeset patch > # User Stanislau Hlebik <stash@fb.com> > # Date 1481281951 28800 > # Fri Dec 09 03:12:31 2016 -0800 > # Node ID 001ceadc2bc36699bdf816370899a27203bf1818 > # Parent 9e29d4e4e08b5996adda49cdd0b497d89e2b16ee > bookmarks: introduce binary encoding > > Bookmarks binary encoding will be used for `bookmarks` bundle2 part. > The format is: <4 bytes - bookmark size, big-endian><bookmark> > <1 byte - 0 if node is empty 1 otherwise><20 bytes node> > ValueError maybe thrown if input is incorrect. > > diff --git a/mercurial/bookmarks.py b/mercurial/bookmarks.py > --- a/mercurial/bookmarks.py > +++ b/mercurial/bookmarks.py > @@ -8,7 +8,9 @@ > from __future__ import absolute_import > > import errno > +import io > import os > +import struct > > from .i18n import _ > from .node import ( > @@ -23,6 +25,72 @@ > util, > ) > > +_NONEMPTYNODE = struct.pack('?', False) > +_EMPTYNODE = struct.pack('?', True) Our constant are still lower case ;-) > +def _unpackbookmarksize(stream): > + """Returns 0 if stream is empty. > + """ > + > + intstruct = struct.Struct('>i') > + expectedsize = intstruct.size > + encodedbookmarksize = stream.read(expectedsize) > + if not encodedbookmarksize: > + return 0 [small nits] What does this "stream" empty case means? If this is an error we should probably just error. If this is an end condition, we could make that more explicit by returning None. > + if len(encodedbookmarksize) != expectedsize: > + raise ValueError( > + _('cannot decode bookmark size: ' > + 'expected size: %d, actual size: %d') % > + (expectedsize, len(encodedbookmarksize))) Check "changegroup.readexactly" it does this check for you. > + return intstruct.unpack(encodedbookmarksize)[0] > + > +def encodebookmarks(bookmarks): > + """Encodes bookmark to node mapping to the byte string. > + > + Format: <4 bytes - bookmark size><bookmark> > + <1 byte - 0 if node is empty 1 otherwise><20 bytes node> > + Node may be 20 byte binary string or empty > + """ > + > + intstruct = struct.Struct('>i') > + for bookmark, node in sorted(bookmarks.iteritems()): > + encodedbookmark = encoding.fromlocal(bookmark) > + yield intstruct.pack(len(encodedbookmark)) > + yield encodedbookmark > + if node: > + if len(node) != 20: > + raise ValueError(_('node must be 20 or bytes long')) Is there case where the content of the node can be wrong ? if not, I would probably just use an assert. > + yield _NONEMPTYNODE > + yield node > + else: > + yield _EMPTYNODE > + > +def decodebookmarks(buf): > + """Decodes buffer into bookmark to node mapping. > + > + Node is either 20 bytes or empty. > + """ > + > + stream = io.BytesIO(buf) > + bookmarks = {} > + bookmarksize = _unpackbookmarksize(stream) > + boolstruct = struct.Struct('?') > + while bookmarksize: > + bookmark = stream.read(bookmarksize) > + if len(bookmark) != bookmarksize: > + raise ValueError( > + _('cannot decode bookmark: expected size: %d, ' > + 'actual size: %d') % (bookmarksize, len(bookmark))) CF previous comment about changegroup.readexactly. > + bookmark = encoding.tolocal(bookmark) > + packedemptynodeflag = stream.read(boolstruct.size) > + emptynode = boolstruct.unpack(packedemptynodeflag)[0] > + node = '' > + if not emptynode: > + node = stream.read(20) lalala, changegroup.readexactly. > + bookmarks[bookmark] = node > + bookmarksize = _unpackbookmarksize(stream) > + return bookmarks > + > def _getbkfile(repo): > """Hook so that extensions that mess with the store can hook bm storage. Cheers,
Excerpts from Pierre-Yves David's message of 2016-12-17 08:39:18 +0100: > > On 12/09/2016 12:16 PM, Stanislau Hlebik wrote: > > # HG changeset patch > > # User Stanislau Hlebik <stash@fb.com> > > # Date 1481281951 28800 > > # Fri Dec 09 03:12:31 2016 -0800 > > # Node ID 001ceadc2bc36699bdf816370899a27203bf1818 > > # Parent 9e29d4e4e08b5996adda49cdd0b497d89e2b16ee > > bookmarks: introduce binary encoding > > > > Bookmarks binary encoding will be used for `bookmarks` bundle2 part. > > The format is: <4 bytes - bookmark size, big-endian><bookmark> > > <1 byte - 0 if node is empty 1 otherwise><20 bytes node> > > ValueError maybe thrown if input is incorrect. > > > > diff --git a/mercurial/bookmarks.py b/mercurial/bookmarks.py > > --- a/mercurial/bookmarks.py > > +++ b/mercurial/bookmarks.py > > @@ -8,7 +8,9 @@ > > from __future__ import absolute_import > > > > import errno > > +import io > > import os > > +import struct > > > > from .i18n import _ > > from .node import ( > > @@ -23,6 +25,72 @@ > > util, > > ) > > > > +_NONEMPTYNODE = struct.pack('?', False) > > +_EMPTYNODE = struct.pack('?', True) > > Our constant are still lower case ;-) > > > +def _unpackbookmarksize(stream): > > + """Returns 0 if stream is empty. > > + """ > > + > > + intstruct = struct.Struct('>i') > > + expectedsize = intstruct.size > > + encodedbookmarksize = stream.read(expectedsize) > > + if not encodedbookmarksize: > > + return 0 > > [small nits] > > What does this "stream" empty case means? > > If this is an error we should probably just error. > > If this is an end condition, we could make that more explicit by > returning None. > It's an end condition, I'll change to None and update comment > > + if len(encodedbookmarksize) != expectedsize: > > + raise ValueError( > > + _('cannot decode bookmark size: ' > > + 'expected size: %d, actual size: %d') % > > + (expectedsize, len(encodedbookmarksize))) > > Check "changegroup.readexactly" it does this check for you. > Thanks for the pointer. I noticed that it's used in couple of files already so it's not specific to changegroup. I think it makes sense to move it from changegroup.py to other file (maybe util.py?) What do you think? > > + return intstruct.unpack(encodedbookmarksize)[0] > > + > > +def encodebookmarks(bookmarks): > > + """Encodes bookmark to node mapping to the byte string. > > + > > + Format: <4 bytes - bookmark size><bookmark> > > + <1 byte - 0 if node is empty 1 otherwise><20 bytes node> > > + Node may be 20 byte binary string or empty > > + """ > > + > > + intstruct = struct.Struct('>i') > > + for bookmark, node in sorted(bookmarks.iteritems()): > > + encodedbookmark = encoding.fromlocal(bookmark) > > + yield intstruct.pack(len(encodedbookmark)) > > + yield encodedbookmark > > + if node: > > + if len(node) != 20: > > + raise ValueError(_('node must be 20 or bytes long')) > > Is there case where the content of the node can be wrong ? if not, I > would probably just use an assert. Will fix > > > + yield _NONEMPTYNODE > > + yield node > > + else: > > + yield _EMPTYNODE > > + > > +def decodebookmarks(buf): > > + """Decodes buffer into bookmark to node mapping. > > + > > + Node is either 20 bytes or empty. > > + """ > > + > > + stream = io.BytesIO(buf) > > + bookmarks = {} > > + bookmarksize = _unpackbookmarksize(stream) > > + boolstruct = struct.Struct('?') > > + while bookmarksize: > > + bookmark = stream.read(bookmarksize) > > + if len(bookmark) != bookmarksize: > > + raise ValueError( > > + _('cannot decode bookmark: expected size: %d, ' > > + 'actual size: %d') % (bookmarksize, len(bookmark))) > > CF previous comment about changegroup.readexactly. > > > + bookmark = encoding.tolocal(bookmark) > > + packedemptynodeflag = stream.read(boolstruct.size) > > + emptynode = boolstruct.unpack(packedemptynodeflag)[0] > > + node = '' > > + if not emptynode: > > + node = stream.read(20) > > lalala, changegroup.readexactly. > > > + bookmarks[bookmark] = node > > + bookmarksize = _unpackbookmarksize(stream) > > + return bookmarks > > + > > def _getbkfile(repo): > > """Hook so that extensions that mess with the store can hook bm storage. > > Cheers, >
Excerpts from Stanislau Hlebik's message of 2016-12-20 13:49:37 +0000: > Excerpts from Pierre-Yves David's message of 2016-12-17 08:39:18 +0100: > > > > On 12/09/2016 12:16 PM, Stanislau Hlebik wrote: > > > # HG changeset patch > > > # User Stanislau Hlebik <stash@fb.com> > > > # Date 1481281951 28800 > > > # Fri Dec 09 03:12:31 2016 -0800 > > > # Node ID 001ceadc2bc36699bdf816370899a27203bf1818 > > > # Parent 9e29d4e4e08b5996adda49cdd0b497d89e2b16ee > > > bookmarks: introduce binary encoding > > > > > > Bookmarks binary encoding will be used for `bookmarks` bundle2 part. > > > The format is: <4 bytes - bookmark size, big-endian><bookmark> > > > <1 byte - 0 if node is empty 1 otherwise><20 bytes node> > > > ValueError maybe thrown if input is incorrect. > > > > > > diff --git a/mercurial/bookmarks.py b/mercurial/bookmarks.py > > > --- a/mercurial/bookmarks.py > > > +++ b/mercurial/bookmarks.py > > > @@ -8,7 +8,9 @@ > > > from __future__ import absolute_import > > > > > > import errno > > > +import io > > > import os > > > +import struct > > > > > > from .i18n import _ > > > from .node import ( > > > @@ -23,6 +25,72 @@ > > > util, > > > ) > > > > > > +_NONEMPTYNODE = struct.pack('?', False) > > > +_EMPTYNODE = struct.pack('?', True) > > > > Our constant are still lower case ;-) > > > > > +def _unpackbookmarksize(stream): > > > + """Returns 0 if stream is empty. > > > + """ > > > + > > > + intstruct = struct.Struct('>i') > > > + expectedsize = intstruct.size > > > + encodedbookmarksize = stream.read(expectedsize) > > > + if not encodedbookmarksize: > > > + return 0 > > > > [small nits] > > > > What does this "stream" empty case means? > > > > If this is an error we should probably just error. > > > > If this is an end condition, we could make that more explicit by > > returning None. > > > > It's an end condition, I'll change to None and update comment Oh, and I won't be able to use changegroup.readexactly here because it will throw exception if stream is empty > > > > + if len(encodedbookmarksize) != expectedsize: > > > + raise ValueError( > > > + _('cannot decode bookmark size: ' > > > + 'expected size: %d, actual size: %d') % > > > + (expectedsize, len(encodedbookmarksize))) > > > > Check "changegroup.readexactly" it does this check for you. > > > > Thanks for the pointer. I noticed that it's used in couple of files > already so it's not specific to changegroup. I think it makes sense to > move it from changegroup.py to other file (maybe util.py?) What do you > think? > > > > + return intstruct.unpack(encodedbookmarksize)[0] > > > + > > > +def encodebookmarks(bookmarks): > > > + """Encodes bookmark to node mapping to the byte string. > > > + > > > + Format: <4 bytes - bookmark size><bookmark> > > > + <1 byte - 0 if node is empty 1 otherwise><20 bytes node> > > > + Node may be 20 byte binary string or empty > > > + """ > > > + > > > + intstruct = struct.Struct('>i') > > > + for bookmark, node in sorted(bookmarks.iteritems()): > > > + encodedbookmark = encoding.fromlocal(bookmark) > > > + yield intstruct.pack(len(encodedbookmark)) > > > + yield encodedbookmark > > > + if node: > > > + if len(node) != 20: > > > + raise ValueError(_('node must be 20 or bytes long')) > > > > Is there case where the content of the node can be wrong ? if not, I > > would probably just use an assert. > > Will fix > > > > > > + yield _NONEMPTYNODE > > > + yield node > > > + else: > > > + yield _EMPTYNODE > > > + > > > +def decodebookmarks(buf): > > > + """Decodes buffer into bookmark to node mapping. > > > + > > > + Node is either 20 bytes or empty. > > > + """ > > > + > > > + stream = io.BytesIO(buf) > > > + bookmarks = {} > > > + bookmarksize = _unpackbookmarksize(stream) > > > + boolstruct = struct.Struct('?') > > > + while bookmarksize: > > > + bookmark = stream.read(bookmarksize) > > > + if len(bookmark) != bookmarksize: > > > + raise ValueError( > > > + _('cannot decode bookmark: expected size: %d, ' > > > + 'actual size: %d') % (bookmarksize, len(bookmark))) > > > > CF previous comment about changegroup.readexactly. > > > > > + bookmark = encoding.tolocal(bookmark) > > > + packedemptynodeflag = stream.read(boolstruct.size) > > > + emptynode = boolstruct.unpack(packedemptynodeflag)[0] > > > + node = '' > > > + if not emptynode: > > > + node = stream.read(20) > > > > lalala, changegroup.readexactly. > > > > > + bookmarks[bookmark] = node > > > + bookmarksize = _unpackbookmarksize(stream) > > > + return bookmarks > > > + > > > def _getbkfile(repo): > > > """Hook so that extensions that mess with the store can hook bm storage. > > > > Cheers, > >
Patch
diff --git a/mercurial/bookmarks.py b/mercurial/bookmarks.py --- a/mercurial/bookmarks.py +++ b/mercurial/bookmarks.py @@ -8,7 +8,9 @@ from __future__ import absolute_import import errno +import io import os +import struct from .i18n import _ from .node import ( @@ -23,6 +25,72 @@ util, ) +_NONEMPTYNODE = struct.pack('?', False) +_EMPTYNODE = struct.pack('?', True) + +def _unpackbookmarksize(stream): + """Returns 0 if stream is empty. + """ + + intstruct = struct.Struct('>i') + expectedsize = intstruct.size + encodedbookmarksize = stream.read(expectedsize) + if not encodedbookmarksize: + return 0 + if len(encodedbookmarksize) != expectedsize: + raise ValueError( + _('cannot decode bookmark size: ' + 'expected size: %d, actual size: %d') % + (expectedsize, len(encodedbookmarksize))) + return intstruct.unpack(encodedbookmarksize)[0] + +def encodebookmarks(bookmarks): + """Encodes bookmark to node mapping to the byte string. + + Format: <4 bytes - bookmark size><bookmark> + <1 byte - 0 if node is empty 1 otherwise><20 bytes node> + Node may be 20 byte binary string or empty + """ + + intstruct = struct.Struct('>i') + for bookmark, node in sorted(bookmarks.iteritems()): + encodedbookmark = encoding.fromlocal(bookmark) + yield intstruct.pack(len(encodedbookmark)) + yield encodedbookmark + if node: + if len(node) != 20: + raise ValueError(_('node must be 20 or bytes long')) + yield _NONEMPTYNODE + yield node + else: + yield _EMPTYNODE + +def decodebookmarks(buf): + """Decodes buffer into bookmark to node mapping. + + Node is either 20 bytes or empty. + """ + + stream = io.BytesIO(buf) + bookmarks = {} + bookmarksize = _unpackbookmarksize(stream) + boolstruct = struct.Struct('?') + while bookmarksize: + bookmark = stream.read(bookmarksize) + if len(bookmark) != bookmarksize: + raise ValueError( + _('cannot decode bookmark: expected size: %d, ' + 'actual size: %d') % (bookmarksize, len(bookmark))) + bookmark = encoding.tolocal(bookmark) + packedemptynodeflag = stream.read(boolstruct.size) + emptynode = boolstruct.unpack(packedemptynodeflag)[0] + node = '' + if not emptynode: + node = stream.read(20) + bookmarks[bookmark] = node + bookmarksize = _unpackbookmarksize(stream) + return bookmarks + def _getbkfile(repo): """Hook so that extensions that mess with the store can hook bm storage.