Patchwork bookmarks: introduce binary encoding

login
register
mail settings
Submitter Stanislau Hlebik
Date Dec. 9, 2016, 11:16 a.m.
Message ID <001ceadc2bc36699bdf8.1481282161@devvm957.lla2.facebook.com>
Download mbox | patch
Permalink /patch/17881/
State Changes Requested
Headers show

Comments

Stanislau Hlebik - Dec. 9, 2016, 11:16 a.m.
# HG changeset patch
# User Stanislau Hlebik <stash@fb.com>
# Date 1481281951 28800
#      Fri Dec 09 03:12:31 2016 -0800
# Node ID 001ceadc2bc36699bdf816370899a27203bf1818
# Parent  9e29d4e4e08b5996adda49cdd0b497d89e2b16ee
bookmarks: introduce binary encoding

Bookmarks binary encoding will be used for `bookmarks` bundle2 part.
The format is: <4 bytes - bookmark size, big-endian><bookmark>
               <1 byte - 0 if node is empty 1 otherwise><20 bytes node>
ValueError maybe thrown if input is incorrect.
Pierre-Yves David - Dec. 17, 2016, 7:39 a.m.
On 12/09/2016 12:16 PM, Stanislau Hlebik wrote:
> # HG changeset patch
> # User Stanislau Hlebik <stash@fb.com>
> # Date 1481281951 28800
> #      Fri Dec 09 03:12:31 2016 -0800
> # Node ID 001ceadc2bc36699bdf816370899a27203bf1818
> # Parent  9e29d4e4e08b5996adda49cdd0b497d89e2b16ee
> bookmarks: introduce binary encoding
>
> Bookmarks binary encoding will be used for `bookmarks` bundle2 part.
> The format is: <4 bytes - bookmark size, big-endian><bookmark>
>                <1 byte - 0 if node is empty 1 otherwise><20 bytes node>
> ValueError maybe thrown if input is incorrect.
>
> diff --git a/mercurial/bookmarks.py b/mercurial/bookmarks.py
> --- a/mercurial/bookmarks.py
> +++ b/mercurial/bookmarks.py
> @@ -8,7 +8,9 @@
>  from __future__ import absolute_import
>
>  import errno
> +import io
>  import os
> +import struct
>
>  from .i18n import _
>  from .node import (
> @@ -23,6 +25,72 @@
>      util,
>  )
>
> +_NONEMPTYNODE = struct.pack('?', False)
> +_EMPTYNODE = struct.pack('?', True)

Our constant are still lower case ;-)

> +def _unpackbookmarksize(stream):
> +    """Returns 0 if stream is empty.
> +    """
> +
> +    intstruct = struct.Struct('>i')
> +    expectedsize = intstruct.size
> +    encodedbookmarksize = stream.read(expectedsize)
> +    if not encodedbookmarksize:
> +        return 0

[small nits]

What does this "stream" empty case means?

If this is an error we should probably just error.

If this is an end condition, we could make that more explicit by 
returning None.

> +    if len(encodedbookmarksize) != expectedsize:
> +        raise ValueError(
> +            _('cannot decode bookmark size: '
> +              'expected size: %d, actual size: %d') %
> +            (expectedsize, len(encodedbookmarksize)))

Check "changegroup.readexactly" it does this check for you.

> +    return intstruct.unpack(encodedbookmarksize)[0]
> +
> +def encodebookmarks(bookmarks):
> +    """Encodes bookmark to node mapping to the byte string.
> +
> +    Format: <4 bytes - bookmark size><bookmark>
> +            <1 byte - 0 if node is empty 1 otherwise><20 bytes node>
> +    Node may be 20 byte binary string or empty
> +    """
> +
> +    intstruct = struct.Struct('>i')
> +    for bookmark, node in sorted(bookmarks.iteritems()):
> +        encodedbookmark = encoding.fromlocal(bookmark)
> +        yield intstruct.pack(len(encodedbookmark))
> +        yield encodedbookmark
> +        if node:
> +            if len(node) != 20:
> +                raise ValueError(_('node must be 20 or bytes long'))

Is there case where the content  of the node can be wrong ? if not, I 
would probably just use an assert.

> +            yield _NONEMPTYNODE
> +            yield node
> +        else:
> +            yield _EMPTYNODE
> +
> +def decodebookmarks(buf):
> +    """Decodes buffer into bookmark to node mapping.
> +
> +    Node is either 20 bytes or empty.
> +    """
> +
> +    stream = io.BytesIO(buf)
> +    bookmarks = {}
> +    bookmarksize = _unpackbookmarksize(stream)
> +    boolstruct = struct.Struct('?')
> +    while bookmarksize:
> +        bookmark = stream.read(bookmarksize)
> +        if len(bookmark) != bookmarksize:
> +            raise ValueError(
> +                _('cannot decode bookmark: expected size: %d, '
> +                'actual size: %d') % (bookmarksize, len(bookmark)))

CF previous comment about changegroup.readexactly.

> +        bookmark = encoding.tolocal(bookmark)
> +        packedemptynodeflag = stream.read(boolstruct.size)
> +        emptynode = boolstruct.unpack(packedemptynodeflag)[0]
> +        node = ''
> +        if not emptynode:
> +            node = stream.read(20)

lalala, changegroup.readexactly.

> +        bookmarks[bookmark] = node
> +        bookmarksize = _unpackbookmarksize(stream)
> +    return bookmarks
> +
>  def _getbkfile(repo):
>      """Hook so that extensions that mess with the store can hook bm storage.

Cheers,
Stanislau Hlebik - Dec. 20, 2016, 1:49 p.m.
Excerpts from Pierre-Yves David's message of 2016-12-17 08:39:18 +0100:
> 
> On 12/09/2016 12:16 PM, Stanislau Hlebik wrote:
> > # HG changeset patch
> > # User Stanislau Hlebik <stash@fb.com>
> > # Date 1481281951 28800
> > #      Fri Dec 09 03:12:31 2016 -0800
> > # Node ID 001ceadc2bc36699bdf816370899a27203bf1818
> > # Parent  9e29d4e4e08b5996adda49cdd0b497d89e2b16ee
> > bookmarks: introduce binary encoding
> >
> > Bookmarks binary encoding will be used for `bookmarks` bundle2 part.
> > The format is: <4 bytes - bookmark size, big-endian><bookmark>
> >                <1 byte - 0 if node is empty 1 otherwise><20 bytes node>
> > ValueError maybe thrown if input is incorrect.
> >
> > diff --git a/mercurial/bookmarks.py b/mercurial/bookmarks.py
> > --- a/mercurial/bookmarks.py
> > +++ b/mercurial/bookmarks.py
> > @@ -8,7 +8,9 @@
> >  from __future__ import absolute_import
> >
> >  import errno
> > +import io
> >  import os
> > +import struct
> >
> >  from .i18n import _
> >  from .node import (
> > @@ -23,6 +25,72 @@
> >      util,
> >  )
> >
> > +_NONEMPTYNODE = struct.pack('?', False)
> > +_EMPTYNODE = struct.pack('?', True)
> 
> Our constant are still lower case ;-)
> 
> > +def _unpackbookmarksize(stream):
> > +    """Returns 0 if stream is empty.
> > +    """
> > +
> > +    intstruct = struct.Struct('>i')
> > +    expectedsize = intstruct.size
> > +    encodedbookmarksize = stream.read(expectedsize)
> > +    if not encodedbookmarksize:
> > +        return 0
> 
> [small nits]
> 
> What does this "stream" empty case means?
> 
> If this is an error we should probably just error.
> 
> If this is an end condition, we could make that more explicit by 
> returning None.
>

It's an end condition, I'll change to None and update comment

> > +    if len(encodedbookmarksize) != expectedsize:
> > +        raise ValueError(
> > +            _('cannot decode bookmark size: '
> > +              'expected size: %d, actual size: %d') %
> > +            (expectedsize, len(encodedbookmarksize)))
> 
> Check "changegroup.readexactly" it does this check for you.
>

Thanks for the pointer. I noticed that it's used in couple of files
already so it's not specific to changegroup. I think it makes sense to
move it from changegroup.py to other file (maybe util.py?) What do you
think?

> > +    return intstruct.unpack(encodedbookmarksize)[0]
> > +
> > +def encodebookmarks(bookmarks):
> > +    """Encodes bookmark to node mapping to the byte string.
> > +
> > +    Format: <4 bytes - bookmark size><bookmark>
> > +            <1 byte - 0 if node is empty 1 otherwise><20 bytes node>
> > +    Node may be 20 byte binary string or empty
> > +    """
> > +
> > +    intstruct = struct.Struct('>i')
> > +    for bookmark, node in sorted(bookmarks.iteritems()):
> > +        encodedbookmark = encoding.fromlocal(bookmark)
> > +        yield intstruct.pack(len(encodedbookmark))
> > +        yield encodedbookmark
> > +        if node:
> > +            if len(node) != 20:
> > +                raise ValueError(_('node must be 20 or bytes long'))
> 
> Is there case where the content  of the node can be wrong ? if not, I 
> would probably just use an assert.

Will fix

> 
> > +            yield _NONEMPTYNODE
> > +            yield node
> > +        else:
> > +            yield _EMPTYNODE
> > +
> > +def decodebookmarks(buf):
> > +    """Decodes buffer into bookmark to node mapping.
> > +
> > +    Node is either 20 bytes or empty.
> > +    """
> > +
> > +    stream = io.BytesIO(buf)
> > +    bookmarks = {}
> > +    bookmarksize = _unpackbookmarksize(stream)
> > +    boolstruct = struct.Struct('?')
> > +    while bookmarksize:
> > +        bookmark = stream.read(bookmarksize)
> > +        if len(bookmark) != bookmarksize:
> > +            raise ValueError(
> > +                _('cannot decode bookmark: expected size: %d, '
> > +                'actual size: %d') % (bookmarksize, len(bookmark)))
> 
> CF previous comment about changegroup.readexactly.
> 
> > +        bookmark = encoding.tolocal(bookmark)
> > +        packedemptynodeflag = stream.read(boolstruct.size)
> > +        emptynode = boolstruct.unpack(packedemptynodeflag)[0]
> > +        node = ''
> > +        if not emptynode:
> > +            node = stream.read(20)
> 
> lalala, changegroup.readexactly.
> 
> > +        bookmarks[bookmark] = node
> > +        bookmarksize = _unpackbookmarksize(stream)
> > +    return bookmarks
> > +
> >  def _getbkfile(repo):
> >      """Hook so that extensions that mess with the store can hook bm storage.
> 
> Cheers,
>
Stanislau Hlebik - Dec. 20, 2016, 1:55 p.m.
Excerpts from Stanislau Hlebik's message of 2016-12-20 13:49:37 +0000:
> Excerpts from Pierre-Yves David's message of 2016-12-17 08:39:18 +0100:
> > 
> > On 12/09/2016 12:16 PM, Stanislau Hlebik wrote:
> > > # HG changeset patch
> > > # User Stanislau Hlebik <stash@fb.com>
> > > # Date 1481281951 28800
> > > #      Fri Dec 09 03:12:31 2016 -0800
> > > # Node ID 001ceadc2bc36699bdf816370899a27203bf1818
> > > # Parent  9e29d4e4e08b5996adda49cdd0b497d89e2b16ee
> > > bookmarks: introduce binary encoding
> > >
> > > Bookmarks binary encoding will be used for `bookmarks` bundle2 part.
> > > The format is: <4 bytes - bookmark size, big-endian><bookmark>
> > >                <1 byte - 0 if node is empty 1 otherwise><20 bytes node>
> > > ValueError maybe thrown if input is incorrect.
> > >
> > > diff --git a/mercurial/bookmarks.py b/mercurial/bookmarks.py
> > > --- a/mercurial/bookmarks.py
> > > +++ b/mercurial/bookmarks.py
> > > @@ -8,7 +8,9 @@
> > >  from __future__ import absolute_import
> > >
> > >  import errno
> > > +import io
> > >  import os
> > > +import struct
> > >
> > >  from .i18n import _
> > >  from .node import (
> > > @@ -23,6 +25,72 @@
> > >      util,
> > >  )
> > >
> > > +_NONEMPTYNODE = struct.pack('?', False)
> > > +_EMPTYNODE = struct.pack('?', True)
> > 
> > Our constant are still lower case ;-)
> > 
> > > +def _unpackbookmarksize(stream):
> > > +    """Returns 0 if stream is empty.
> > > +    """
> > > +
> > > +    intstruct = struct.Struct('>i')
> > > +    expectedsize = intstruct.size
> > > +    encodedbookmarksize = stream.read(expectedsize)
> > > +    if not encodedbookmarksize:
> > > +        return 0
> > 
> > [small nits]
> > 
> > What does this "stream" empty case means?
> > 
> > If this is an error we should probably just error.
> > 
> > If this is an end condition, we could make that more explicit by 
> > returning None.
> >
> 
> It's an end condition, I'll change to None and update comment

Oh, and I won't be able to use changegroup.readexactly here because it
will throw exception if stream is empty

> 
> > > +    if len(encodedbookmarksize) != expectedsize:
> > > +        raise ValueError(
> > > +            _('cannot decode bookmark size: '
> > > +              'expected size: %d, actual size: %d') %
> > > +            (expectedsize, len(encodedbookmarksize)))
> > 
> > Check "changegroup.readexactly" it does this check for you.
> >
> 
> Thanks for the pointer. I noticed that it's used in couple of files
> already so it's not specific to changegroup. I think it makes sense to
> move it from changegroup.py to other file (maybe util.py?) What do you
> think?
> 
> > > +    return intstruct.unpack(encodedbookmarksize)[0]
> > > +
> > > +def encodebookmarks(bookmarks):
> > > +    """Encodes bookmark to node mapping to the byte string.
> > > +
> > > +    Format: <4 bytes - bookmark size><bookmark>
> > > +            <1 byte - 0 if node is empty 1 otherwise><20 bytes node>
> > > +    Node may be 20 byte binary string or empty
> > > +    """
> > > +
> > > +    intstruct = struct.Struct('>i')
> > > +    for bookmark, node in sorted(bookmarks.iteritems()):
> > > +        encodedbookmark = encoding.fromlocal(bookmark)
> > > +        yield intstruct.pack(len(encodedbookmark))
> > > +        yield encodedbookmark
> > > +        if node:
> > > +            if len(node) != 20:
> > > +                raise ValueError(_('node must be 20 or bytes long'))
> > 
> > Is there case where the content  of the node can be wrong ? if not, I 
> > would probably just use an assert.
> 
> Will fix
> 
> > 
> > > +            yield _NONEMPTYNODE
> > > +            yield node
> > > +        else:
> > > +            yield _EMPTYNODE
> > > +
> > > +def decodebookmarks(buf):
> > > +    """Decodes buffer into bookmark to node mapping.
> > > +
> > > +    Node is either 20 bytes or empty.
> > > +    """
> > > +
> > > +    stream = io.BytesIO(buf)
> > > +    bookmarks = {}
> > > +    bookmarksize = _unpackbookmarksize(stream)
> > > +    boolstruct = struct.Struct('?')
> > > +    while bookmarksize:
> > > +        bookmark = stream.read(bookmarksize)
> > > +        if len(bookmark) != bookmarksize:
> > > +            raise ValueError(
> > > +                _('cannot decode bookmark: expected size: %d, '
> > > +                'actual size: %d') % (bookmarksize, len(bookmark)))
> > 
> > CF previous comment about changegroup.readexactly.
> > 
> > > +        bookmark = encoding.tolocal(bookmark)
> > > +        packedemptynodeflag = stream.read(boolstruct.size)
> > > +        emptynode = boolstruct.unpack(packedemptynodeflag)[0]
> > > +        node = ''
> > > +        if not emptynode:
> > > +            node = stream.read(20)
> > 
> > lalala, changegroup.readexactly.
> > 
> > > +        bookmarks[bookmark] = node
> > > +        bookmarksize = _unpackbookmarksize(stream)
> > > +    return bookmarks
> > > +
> > >  def _getbkfile(repo):
> > >      """Hook so that extensions that mess with the store can hook bm storage.
> > 
> > Cheers,
> >

Patch

diff --git a/mercurial/bookmarks.py b/mercurial/bookmarks.py
--- a/mercurial/bookmarks.py
+++ b/mercurial/bookmarks.py
@@ -8,7 +8,9 @@ 
 from __future__ import absolute_import
 
 import errno
+import io
 import os
+import struct
 
 from .i18n import _
 from .node import (
@@ -23,6 +25,72 @@ 
     util,
 )
 
+_NONEMPTYNODE = struct.pack('?', False)
+_EMPTYNODE = struct.pack('?', True)
+
+def _unpackbookmarksize(stream):
+    """Returns 0 if stream is empty.
+    """
+
+    intstruct = struct.Struct('>i')
+    expectedsize = intstruct.size
+    encodedbookmarksize = stream.read(expectedsize)
+    if not encodedbookmarksize:
+        return 0
+    if len(encodedbookmarksize) != expectedsize:
+        raise ValueError(
+            _('cannot decode bookmark size: '
+              'expected size: %d, actual size: %d') %
+            (expectedsize, len(encodedbookmarksize)))
+    return intstruct.unpack(encodedbookmarksize)[0]
+
+def encodebookmarks(bookmarks):
+    """Encodes bookmark to node mapping to the byte string.
+
+    Format: <4 bytes - bookmark size><bookmark>
+            <1 byte - 0 if node is empty 1 otherwise><20 bytes node>
+    Node may be 20 byte binary string or empty
+    """
+
+    intstruct = struct.Struct('>i')
+    for bookmark, node in sorted(bookmarks.iteritems()):
+        encodedbookmark = encoding.fromlocal(bookmark)
+        yield intstruct.pack(len(encodedbookmark))
+        yield encodedbookmark
+        if node:
+            if len(node) != 20:
+                raise ValueError(_('node must be 20 or bytes long'))
+            yield _NONEMPTYNODE
+            yield node
+        else:
+            yield _EMPTYNODE
+
+def decodebookmarks(buf):
+    """Decodes buffer into bookmark to node mapping.
+
+    Node is either 20 bytes or empty.
+    """
+
+    stream = io.BytesIO(buf)
+    bookmarks = {}
+    bookmarksize = _unpackbookmarksize(stream)
+    boolstruct = struct.Struct('?')
+    while bookmarksize:
+        bookmark = stream.read(bookmarksize)
+        if len(bookmark) != bookmarksize:
+            raise ValueError(
+                _('cannot decode bookmark: expected size: %d, '
+                'actual size: %d') % (bookmarksize, len(bookmark)))
+        bookmark = encoding.tolocal(bookmark)
+        packedemptynodeflag = stream.read(boolstruct.size)
+        emptynode = boolstruct.unpack(packedemptynodeflag)[0]
+        node = ''
+        if not emptynode:
+            node = stream.read(20)
+        bookmarks[bookmark] = node
+        bookmarksize = _unpackbookmarksize(stream)
+    return bookmarks
+
 def _getbkfile(repo):
     """Hook so that extensions that mess with the store can hook bm storage.