Patchwork [02,of,15,V2] bookmark: add methods to binary encode and decode bookmark values

login
register
mail settings
Submitter Boris Feld
Date Nov. 2, 2017, 1:17 p.m.
Message ID <4d0c6772a81aa1e2b25f.1509628679@FB>
Download mbox | patch
Permalink /patch/25331/
State Accepted
Headers show

Comments

Boris Feld - Nov. 2, 2017, 1:17 p.m.
# HG changeset patch
# User Boris Feld <boris.feld@octobus.net>
# Date 1508072395 -7200
#      Sun Oct 15 14:59:55 2017 +0200
# Node ID 4d0c6772a81aa1e2b25f32f944563db1f33fd327
# Parent  8c9a9eecdcd61401a1604a08a5272f7dabd4b912
# EXP-Topic b2.bookmarks
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 4d0c6772a81a
bookmark: add methods to binary encode and decode bookmark values

Coming new bundle2 parts related to bookmark will use a binary encoding. It
encodes a series of '(bookmark, node)' pairs. Bookmark name has a high enough
size limit to not be affected by issue5165. (64K length, we are well covered)
Augie Fackler - Nov. 10, 2017, 10:35 p.m.
(+indygreg, who also is a formats enthusiast)

On Thu, Nov 02, 2017 at 02:17:59PM +0100, Boris Feld wrote:
> # HG changeset patch
> # User Boris Feld <boris.feld@octobus.net>
> # Date 1508072395 -7200
> #      Sun Oct 15 14:59:55 2017 +0200
> # Node ID 4d0c6772a81aa1e2b25f32f944563db1f33fd327
> # Parent  8c9a9eecdcd61401a1604a08a5272f7dabd4b912
> # EXP-Topic b2.bookmarks
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 4d0c6772a81a
> bookmark: add methods to binary encode and decode bookmark values
>
> Coming new bundle2 parts related to bookmark will use a binary encoding. It
> encodes a series of '(bookmark, node)' pairs. Bookmark name has a high enough
> size limit to not be affected by issue5165. (64K length, we are well covered)

I'm not thrilled here. Could we do some sort of varint encoding, which
would be generally useful going forward for a variety of things,
rather than just shrugging and setting a limit which we (today) deem
absurdly large?

I agree that practically speaking, nobody should have a 64k bookmark,
but we can do better. We also know that \0 shouldn't appear in a
bookmark name, so we could just make the format be
null-terminated. I'd much rather we get in the practice of
deliberately crafting formats that are maximally flexible and
reusable.

IOW, encode something like this:

struct {
  node [20]byte
  name [] byte // continues until you see the first NUL
}

Greg, am I talking crazy?


>
> diff --git a/mercurial/bookmarks.py b/mercurial/bookmarks.py
> --- a/mercurial/bookmarks.py
> +++ b/mercurial/bookmarks.py
> @@ -8,12 +8,14 @@
>  from __future__ import absolute_import
>
>  import errno
> +import struct
>
>  from .i18n import _
>  from .node import (
>      bin,
>      hex,
>      short,
> +    wdirid,
>  )
>  from . import (
>      encoding,
> @@ -550,6 +552,60 @@ def unhexlifybookmarks(marks):
>          binremotemarks[name] = bin(node)
>      return binremotemarks
>
> +_binaryentry = struct.Struct('>20sH')
> +
> +def binaryencode(bookmarks):
> +    """encode a '(bookmark, node)' iterable into a binary stream
> +
> +    the binary format is:
> +
> +        <node><bookmark-length><bookmark-name>
> +
> +    :node: is a 20 bytes binary node,
> +    :bookmark-length: an unsigned short,
> +    :bookmark-name: the name of the bookmark (of length <bookmark-length>)
> +
> +    wdirid (all bits set) will be used as a special value for "missing"
> +    """
> +    binarydata = []
> +    for book, node in bookmarks:
> +        if not node: # None or ''
> +            node = wdirid
> +        binarydata.append(_binaryentry.pack(node, len(book)))
> +        binarydata.append(book)
> +    return ''.join(binarydata)
> +
> +def binarydecode(stream):
> +    """decode a binary stream into an '(bookmark, node)' iterable
> +
> +    the binary format is:
> +
> +        <node><bookmark-length><bookmark-name>
> +
> +    :node: is a 20 bytes binary node,
> +    :bookmark-length: an unsigned short,
> +    :bookmark-name: the name of the bookmark (of length <bookmark-length>))
> +
> +    wdirid (all bits set) will be used as a special value for "missing"
> +    """
> +    entrysize = _binaryentry.size
> +    books = []
> +    while True:
> +        entry = stream.read(entrysize)
> +        if len(entry) < entrysize:
> +            if entry:
> +                raise error.Abort(_('bad bookmark stream'))
> +            break
> +        node, length = _binaryentry.unpack(entry)
> +        bookmark = stream.read(length)
> +        if len(bookmark) < length:
> +            if entry:
> +                raise error.Abort(_('bad bookmark stream'))
> +        if node == wdirid:
> +            node = None
> +        books.append((bookmark, node))
> +    return books
> +
>  def updatefromremote(ui, repo, remotemarks, path, trfunc, explicit=()):
>      ui.debug("checking for updated bookmarks\n")
>      localmarks = repo._bookmarks
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Boris Feld - Nov. 13, 2017, 10:16 a.m.
On Fri, 2017-11-10 at 17:35 -0500, Augie Fackler wrote:
> (+indygreg, who also is a formats enthusiast)
> 
> On Thu, Nov 02, 2017 at 02:17:59PM +0100, Boris Feld wrote:
> > # HG changeset patch
> > # User Boris Feld <boris.feld@octobus.net>
> > # Date 1508072395 -7200
> > #      Sun Oct 15 14:59:55 2017 +0200
> > # Node ID 4d0c6772a81aa1e2b25f32f944563db1f33fd327
> > # Parent  8c9a9eecdcd61401a1604a08a5272f7dabd4b912
> > # EXP-Topic b2.bookmarks
> > # Available At https://bitbucket.org/octobus/mercurial-devel/
> > #              hg pull https://bitbucket.org/octobus/mercurial-deve
> > l/ -r 4d0c6772a81a
> > bookmark: add methods to binary encode and decode bookmark values
> > 
> > Coming new bundle2 parts related to bookmark will use a binary
> > encoding. It
> > encodes a series of '(bookmark, node)' pairs. Bookmark name has a
> > high enough
> > size limit to not be affected by issue5165. (64K length, we are
> > well covered)
> 
> I'm not thrilled here. Could we do some sort of varint encoding,
> which
> would be generally useful going forward for a variety of things,
> rather than just shrugging and setting a limit which we (today) deem
> absurdly large?

We are not sure what the value of that would be. Bundle2 makes it very
easy to move to a newer encoding version if needed. We are already
doing so for changegroup and obsmarkers.

The biggest bookmark we have seen was about 800 characters, that should
leave us some room for the future without blocking us.

> 
> I agree that practically speaking, nobody should have a 64k bookmark,
> but we can do better. We also know that \0 shouldn't appear in a
> bookmark name, so we could just make the format be
> null-terminated. I'd much rather we get in the practice of
> deliberately crafting formats that are maximally flexible and
> reusable.

That would be significantly harder to parse since we would need to
search for the '\0' in the stream. We do not think it brings much value
either since any new data we sneak into the last field would have to be
understood by the receiver. If we need to negotiate for such
capabilities, this is equivalent to rolling out a new encoding.

> 
> IOW, encode something like this:
> 
> struct {
>   node [20]byte
>   name [] byte // continues until you see the first NUL
> }
> 
> Greg, am I talking crazy?
> 
> 
> > 
> > diff --git a/mercurial/bookmarks.py b/mercurial/bookmarks.py
> > --- a/mercurial/bookmarks.py
> > +++ b/mercurial/bookmarks.py
> > @@ -8,12 +8,14 @@
> >  from __future__ import absolute_import
> > 
> >  import errno
> > +import struct
> > 
> >  from .i18n import _
> >  from .node import (
> >      bin,
> >      hex,
> >      short,
> > +    wdirid,
> >  )
> >  from . import (
> >      encoding,
> > @@ -550,6 +552,60 @@ def unhexlifybookmarks(marks):
> >          binremotemarks[name] = bin(node)
> >      return binremotemarks
> > 
> > +_binaryentry = struct.Struct('>20sH')
> > +
> > +def binaryencode(bookmarks):
> > +    """encode a '(bookmark, node)' iterable into a binary stream
> > +
> > +    the binary format is:
> > +
> > +        <node><bookmark-length><bookmark-name>
> > +
> > +    :node: is a 20 bytes binary node,
> > +    :bookmark-length: an unsigned short,
> > +    :bookmark-name: the name of the bookmark (of length <bookmark-
> > length>)
> > +
> > +    wdirid (all bits set) will be used as a special value for
> > "missing"
> > +    """
> > +    binarydata = []
> > +    for book, node in bookmarks:
> > +        if not node: # None or ''
> > +            node = wdirid
> > +        binarydata.append(_binaryentry.pack(node, len(book)))
> > +        binarydata.append(book)
> > +    return ''.join(binarydata)
> > +
> > +def binarydecode(stream):
> > +    """decode a binary stream into an '(bookmark, node)' iterable
> > +
> > +    the binary format is:
> > +
> > +        <node><bookmark-length><bookmark-name>
> > +
> > +    :node: is a 20 bytes binary node,
> > +    :bookmark-length: an unsigned short,
> > +    :bookmark-name: the name of the bookmark (of length <bookmark-
> > length>))
> > +
> > +    wdirid (all bits set) will be used as a special value for
> > "missing"
> > +    """
> > +    entrysize = _binaryentry.size
> > +    books = []
> > +    while True:
> > +        entry = stream.read(entrysize)
> > +        if len(entry) < entrysize:
> > +            if entry:
> > +                raise error.Abort(_('bad bookmark stream'))
> > +            break
> > +        node, length = _binaryentry.unpack(entry)
> > +        bookmark = stream.read(length)
> > +        if len(bookmark) < length:
> > +            if entry:
> > +                raise error.Abort(_('bad bookmark stream'))
> > +        if node == wdirid:
> > +            node = None
> > +        books.append((bookmark, node))
> > +    return books
> > +
> >  def updatefromremote(ui, repo, remotemarks, path, trfunc,
> > explicit=()):
> >      ui.debug("checking for updated bookmarks\n")
> >      localmarks = repo._bookmarks
> > _______________________________________________
> > Mercurial-devel mailing list
> > Mercurial-devel@mercurial-scm.org
> > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Augie Fackler - Nov. 13, 2017, 11:30 p.m.
> On Nov 13, 2017, at 05:16, Boris Feld <boris.feld@octobus.net> wrote:
> 
> On Fri, 2017-11-10 at 17:35 -0500, Augie Fackler wrote:
>> (+indygreg, who also is a formats enthusiast)
>> 
>> On Thu, Nov 02, 2017 at 02:17:59PM +0100, Boris Feld wrote:
>>> # HG changeset patch
>>> # User Boris Feld <boris.feld@octobus.net>
>>> # Date 1508072395 -7200
>>> #      Sun Oct 15 14:59:55 2017 +0200
>>> # Node ID 4d0c6772a81aa1e2b25f32f944563db1f33fd327
>>> # Parent  8c9a9eecdcd61401a1604a08a5272f7dabd4b912
>>> # EXP-Topic b2.bookmarks
>>> # Available At https://bitbucket.org/octobus/mercurial-devel/
>>> #              hg pull https://bitbucket.org/octobus/mercurial-deve
>>> l/ -r 4d0c6772a81a
>>> bookmark: add methods to binary encode and decode bookmark values
>>> 
>>> Coming new bundle2 parts related to bookmark will use a binary
>>> encoding. It
>>> encodes a series of '(bookmark, node)' pairs. Bookmark name has a
>>> high enough
>>> size limit to not be affected by issue5165. (64K length, we are
>>> well covered)
>> 
>> I'm not thrilled here. Could we do some sort of varint encoding,
>> which
>> would be generally useful going forward for a variety of things,
>> rather than just shrugging and setting a limit which we (today) deem
>> absurdly large?
> 
> We are not sure what the value of that would be. Bundle2 makes it very
> easy to move to a newer encoding version if needed.

I mean, not really. We have to add a b2cap, and then we've got to maintain multiple versions forever. It's kind of a pain. A varint encoding wouldn't be hard, and will probably take O(an hour) to code up. I'd entertain that much more eagerly than just dropping a uint64 in here and asserting it should be big enough forever.

> We are already
> doing so for changegroup and obsmarkers.
> 
> The biggest bookmark we have seen was about 800 characters, that should
> leave us some room for the future without blocking us.

That you know about. I seem to remember violent arguments that 255 bytes should be more than enough. I agree a 64k bookmark sounds totally insane, but what if I wanted to put base64'd test result metadata in a bookmark?

> 
>> 
>> I agree that practically speaking, nobody should have a 64k bookmark,
>> but we can do better. We also know that \0 shouldn't appear in a
>> bookmark name, so we could just make the format be
>> null-terminated. I'd much rather we get in the practice of
>> deliberately crafting formats that are maximally flexible and
>> reusable.
> 
> That would be significantly harder to parse since we would need to
> search for the '\0' in the stream. We do not think it brings much value
> either since any new data we sneak into the last field would have to be
> understood by the receiver. If we need to negotiate for such
> capabilities, this is equivalent to rolling out a new encoding.

Then let's explore the varint option. Here's a rough sketch of a reader:

import io
src = io.BytesIO(b'\xff\x01')
cur = ord(src.read(1))
i = cur & 0x7f
while cur & 0x80:
    i = i << 7
    cur = ord(src.read(1))
    i += cur & 0x7f
assert 0b11111110000001 == i

Gives you 7 bits of value per byte transmitted, which means that we'll get most realistic bookmark lengths in two bytes of "here's how much string" (probably one byte for the vast majority of ascii cases!)

I'd much rather we just build up the infrastructure for stuff like this _now_ and start using it, rather than continue to punt and shove massive uint64 fields in and then pray it'll be enough for all eternity. It does mean we can't use struct anymore, but that seems like a comparatively small price to pay for being able to trivially make this future proof.

(As before, if other reviewers think I'm being too hard on this, please speak up.)
Boris Feld - Nov. 16, 2017, 5:59 p.m.
On Mon, 2017-11-13 at 18:30 -0500, Augie Fackler wrote:
> > On Nov 13, 2017, at 05:16, Boris Feld <boris.feld@octobus.net>
> > wrote:
> > 
> > On Fri, 2017-11-10 at 17:35 -0500, Augie Fackler wrote:
> > > (+indygreg, who also is a formats enthusiast)
> > > 
> > > On Thu, Nov 02, 2017 at 02:17:59PM +0100, Boris Feld wrote:
> > > > # HG changeset patch
> > > > # User Boris Feld <boris.feld@octobus.net>
> > > > # Date 1508072395 -7200
> > > > #      Sun Oct 15 14:59:55 2017 +0200
> > > > # Node ID 4d0c6772a81aa1e2b25f32f944563db1f33fd327
> > > > # Parent  8c9a9eecdcd61401a1604a08a5272f7dabd4b912
> > > > # EXP-Topic b2.bookmarks
> > > > # Available At https://bitbucket.org/octobus/mercurial-devel/
> > > > #              hg pull https://bitbucket.org/octobus/mercurial-
> > > > deve
> > > > l/ -r 4d0c6772a81a
> > > > bookmark: add methods to binary encode and decode bookmark
> > > > values
> > > > 
> > > > Coming new bundle2 parts related to bookmark will use a binary
> > > > encoding. It
> > > > encodes a series of '(bookmark, node)' pairs. Bookmark name has
> > > > a
> > > > high enough
> > > > size limit to not be affected by issue5165. (64K length, we are
> > > > well covered)
> > > 
> > > I'm not thrilled here. Could we do some sort of varint encoding,
> > > which
> > > would be generally useful going forward for a variety of things,
> > > rather than just shrugging and setting a limit which we (today)
> > > deem
> > > absurdly large?
> > 
> > We are not sure what the value of that would be. Bundle2 makes it
> > very
> > easy to move to a newer encoding version if needed.
> 
> I mean, not really. We have to add a b2cap, and then we've got to
> maintain multiple versions forever. It's kind of a pain. A varint
> encoding wouldn't be hard, and will probably take O(an hour) to code
> up. I'd entertain that much more eagerly than just dropping a uint64
> in here and asserting it should be big enough forever.
> 
> > We are already
> > doing so for changegroup and obsmarkers.
> > 
> > The biggest bookmark we have seen was about 800 characters, that
> > should
> > leave us some room for the future without blocking us.
> 
> That you know about. I seem to remember violent arguments that 255
> bytes should be more than enough. I agree a 64k bookmark sounds
> totally insane, but what if I wanted to put base64'd test result
> metadata in a bookmark?
> 
> > 
> > > 
> > > I agree that practically speaking, nobody should have a 64k
> > > bookmark,
> > > but we can do better. We also know that \0 shouldn't appear in a
> > > bookmark name, so we could just make the format be
> > > null-terminated. I'd much rather we get in the practice of
> > > deliberately crafting formats that are maximally flexible and
> > > reusable.
> > 
> > That would be significantly harder to parse since we would need to
> > search for the '\0' in the stream. We do not think it brings much
> > value
> > either since any new data we sneak into the last field would have
> > to be
> > understood by the receiver. If we need to negotiate for such
> > capabilities, this is equivalent to rolling out a new encoding.
> 
> Then let's explore the varint option. Here's a rough sketch of a
> reader:
> 
> import io
> src = io.BytesIO(b'\xff\x01')
> cur = ord(src.read(1))
> i = cur & 0x7f
> while cur & 0x80:
>     i = i << 7
>     cur = ord(src.read(1))
>     i += cur & 0x7f
> assert 0b11111110000001 == i
> 
> Gives you 7 bits of value per byte transmitted, which means that
> we'll get most realistic bookmark lengths in two bytes of "here's how
> much string" (probably one byte for the vast majority of ascii
> cases!)
> 
> I'd much rather we just build up the infrastructure for stuff like
> this _now_ and start using it, rather than continue to punt and shove
> massive uint64 fields in and then pray it'll be enough for all
> eternity. It does mean we can't use struct anymore, but that seems
> like a comparatively small price to pay for being able to trivially
> make this future proof.
> 
> (As before, if other reviewers think I'm being too hard on this,
> please speak up.)

The current exchange implementation is already putting some soft
limits, the HTTP header size limit (usually around 8K). If people tried
exchanging more than 8K bookmarks over HTTP, they would have seen an
error and we would have seen tickets on Bugzilla.

Bookmarks are labels on changesets to help track lines of development,
so encoding the results of the CI as bookmark seems to fall outside
what bookmarks are designed for. As such, we think we should not
support this use-case, at least not with bookmarks.

If we want bookmarks larger than 64K, we will need to fix several other
problems in order to have decent performance and UI before supporting
bigger bookmarks decently.

As bookmarks are labels, they are displayed in various UI, both in
Mercurial and third-parties interfaces (GUI, hosting). I'm pretty sure
these interfaces were not designed for such longs bookmarks. And
designing an interface for an arbitrary-length bookmark name is not an
easy task. I tested adding a 64K bookmark in one of my repository and
thg crashed, I cannot view this repository even after removing the
bookmark and restarting thg.

Also, as they are labels, they are loaded very often. Arbitrary-length
bookmark name will require adjustments on the loading bookmark code and
to be very careful with memory usage.

We would prefer not to block this bugfix on testing varint, if a real
use-case appears for bigger bookmark, we would be happy and will take
care of updating the needed code.

The varint ideas seem interesting on its own, but we don't think we
should use it here, the extra complexity doesn't seem worthwhile in
this case.
Sean Farley - Nov. 17, 2017, 9:24 p.m.
Augie Fackler <raf@durin42.com> writes:

>> On Nov 13, 2017, at 05:16, Boris Feld <boris.feld@octobus.net> wrote:
>>
>> On Fri, 2017-11-10 at 17:35 -0500, Augie Fackler wrote:
>>> (+indygreg, who also is a formats enthusiast)
>>>
>>> On Thu, Nov 02, 2017 at 02:17:59PM +0100, Boris Feld wrote:
>>>> # HG changeset patch
>>>> # User Boris Feld <boris.feld@octobus.net>
>>>> # Date 1508072395 -7200
>>>> #      Sun Oct 15 14:59:55 2017 +0200
>>>> # Node ID 4d0c6772a81aa1e2b25f32f944563db1f33fd327
>>>> # Parent  8c9a9eecdcd61401a1604a08a5272f7dabd4b912
>>>> # EXP-Topic b2.bookmarks
>>>> # Available At https://bitbucket.org/octobus/mercurial-devel/
>>>> #              hg pull https://bitbucket.org/octobus/mercurial-deve
>>>> l/ -r 4d0c6772a81a
>>>> bookmark: add methods to binary encode and decode bookmark values
>>>>
>>>> Coming new bundle2 parts related to bookmark will use a binary
>>>> encoding. It
>>>> encodes a series of '(bookmark, node)' pairs. Bookmark name has a
>>>> high enough
>>>> size limit to not be affected by issue5165. (64K length, we are
>>>> well covered)
>>>
>>> I'm not thrilled here. Could we do some sort of varint encoding,
>>> which
>>> would be generally useful going forward for a variety of things,
>>> rather than just shrugging and setting a limit which we (today) deem
>>> absurdly large?
>>
>> We are not sure what the value of that would be. Bundle2 makes it very
>> easy to move to a newer encoding version if needed.
>
> I mean, not really. We have to add a b2cap, and then we've got to maintain multiple versions forever. It's kind of a pain. A varint encoding wouldn't be hard, and will probably take O(an hour) to code up. I'd entertain that much more eagerly than just dropping a uint64 in here and asserting it should be big enough forever.
>
>> We are already
>> doing so for changegroup and obsmarkers.
>>
>> The biggest bookmark we have seen was about 800 characters, that should
>> leave us some room for the future without blocking us.
>
> That you know about. I seem to remember violent arguments that 255 bytes should be more than enough. I agree a 64k bookmark sounds totally insane, but what if I wanted to put base64'd test result metadata in a bookmark?
>
>>
>>>
>>> I agree that practically speaking, nobody should have a 64k bookmark,
>>> but we can do better. We also know that \0 shouldn't appear in a
>>> bookmark name, so we could just make the format be
>>> null-terminated. I'd much rather we get in the practice of
>>> deliberately crafting formats that are maximally flexible and
>>> reusable.
>>
>> That would be significantly harder to parse since we would need to
>> search for the '\0' in the stream. We do not think it brings much value
>> either since any new data we sneak into the last field would have to be
>> understood by the receiver. If we need to negotiate for such
>> capabilities, this is equivalent to rolling out a new encoding.
>
> Then let's explore the varint option. Here's a rough sketch of a reader:
>
> import io
> src = io.BytesIO(b'\xff\x01')
> cur = ord(src.read(1))
> i = cur & 0x7f
> while cur & 0x80:
>     i = i << 7
>     cur = ord(src.read(1))
>     i += cur & 0x7f
> assert 0b11111110000001 == i
>
> Gives you 7 bits of value per byte transmitted, which means that we'll get most realistic bookmark lengths in two bytes of "here's how much string" (probably one byte for the vast majority of ascii cases!)
>
> I'd much rather we just build up the infrastructure for stuff like this _now_ and start using it, rather than continue to punt and shove massive uint64 fields in and then pray it'll be enough for all eternity. It does mean we can't use struct anymore, but that seems like a comparatively small price to pay for being able to trivially make this future proof.
>
> (As before, if other reviewers think I'm being too hard on this, please speak up.)

Just to chime in here with some Bitbucket data. All branch names and
git refs on Bitbucket all fall in < 1K. That's because we arbitrarily
set that long ago and there hasn't ever been an issue.

In fact, if you try to have a longer ref name, I suspect you'd hit the
MAX_PATH limit before the 1K limit. Regardless, parts of Bitbucket will
straight-up 500 on long names. To add to this, designing a layout for
bookmarks / branches / refs listing is fairly difficult, UX-wise, when
such a long limit is given.

I'd lean on saying 64K is too large, actually, and that binary encoding
is just not what bookmark names should be. (so, yes, I think 64k is more
than enough)

Patch

diff --git a/mercurial/bookmarks.py b/mercurial/bookmarks.py
--- a/mercurial/bookmarks.py
+++ b/mercurial/bookmarks.py
@@ -8,12 +8,14 @@ 
 from __future__ import absolute_import
 
 import errno
+import struct
 
 from .i18n import _
 from .node import (
     bin,
     hex,
     short,
+    wdirid,
 )
 from . import (
     encoding,
@@ -550,6 +552,60 @@  def unhexlifybookmarks(marks):
         binremotemarks[name] = bin(node)
     return binremotemarks
 
+_binaryentry = struct.Struct('>20sH')
+
+def binaryencode(bookmarks):
+    """encode a '(bookmark, node)' iterable into a binary stream
+
+    the binary format is:
+
+        <node><bookmark-length><bookmark-name>
+
+    :node: is a 20 bytes binary node,
+    :bookmark-length: an unsigned short,
+    :bookmark-name: the name of the bookmark (of length <bookmark-length>)
+
+    wdirid (all bits set) will be used as a special value for "missing"
+    """
+    binarydata = []
+    for book, node in bookmarks:
+        if not node: # None or ''
+            node = wdirid
+        binarydata.append(_binaryentry.pack(node, len(book)))
+        binarydata.append(book)
+    return ''.join(binarydata)
+
+def binarydecode(stream):
+    """decode a binary stream into an '(bookmark, node)' iterable
+
+    the binary format is:
+
+        <node><bookmark-length><bookmark-name>
+
+    :node: is a 20 bytes binary node,
+    :bookmark-length: an unsigned short,
+    :bookmark-name: the name of the bookmark (of length <bookmark-length>))
+
+    wdirid (all bits set) will be used as a special value for "missing"
+    """
+    entrysize = _binaryentry.size
+    books = []
+    while True:
+        entry = stream.read(entrysize)
+        if len(entry) < entrysize:
+            if entry:
+                raise error.Abort(_('bad bookmark stream'))
+            break
+        node, length = _binaryentry.unpack(entry)
+        bookmark = stream.read(length)
+        if len(bookmark) < length:
+            if entry:
+                raise error.Abort(_('bad bookmark stream'))
+        if node == wdirid:
+            node = None
+        books.append((bookmark, node))
+    return books
+
 def updatefromremote(ui, repo, remotemarks, path, trfunc, explicit=()):
     ui.debug("checking for updated bookmarks\n")
     localmarks = repo._bookmarks