Patchwork [02,of,15] bookmark: add methods to binary encode and decode bookmark values

login
register
mail settings
Submitter Boris Feld
Date Oct. 18, 2017, 4:09 p.m.
Message ID <f27feea4fe4c313aa87c.1508342994@FB>
Download mbox | patch
Permalink /patch/25186/
State Accepted
Headers show

Comments

Boris Feld - Oct. 18, 2017, 4:09 p.m.
# HG changeset patch
# User Boris Feld <boris.feld@octobus.net>
# Date 1508072395 -7200
#      Sun Oct 15 14:59:55 2017 +0200
# Node ID f27feea4fe4c313aa87c85aabd1ed8c3c4b31296
# Parent  a450dc69ef95073af4d4ece34e251ed0bb5b808f
# EXP-Topic b2.bookmarks
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r f27feea4fe4c
bookmark: add methods to binary encode and decode bookmark values

Coming new bundle2 parts related to bookmark will use a binary encoding. It
encodes a series of '(bookmark, node)' pairs. Bookmark name has a high enough
size limit to not be affected by issue5165. (64K length, we are well covered)
Gregory Szorc - Oct. 18, 2017, 4:53 p.m.
On Wed, Oct 18, 2017 at 6:09 PM, Boris Feld <boris.feld@octobus.net> wrote:

> # HG changeset patch
> # User Boris Feld <boris.feld@octobus.net>
> # Date 1508072395 -7200
> #      Sun Oct 15 14:59:55 2017 +0200
> # Node ID f27feea4fe4c313aa87c85aabd1ed8c3c4b31296
> # Parent  a450dc69ef95073af4d4ece34e251ed0bb5b808f
> # EXP-Topic b2.bookmarks
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r
> f27feea4fe4c
> bookmark: add methods to binary encode and decode bookmark values
>
> Coming new bundle2 parts related to bookmark will use a binary encoding. It
> encodes a series of '(bookmark, node)' pairs. Bookmark name has a high
> enough
> size limit to not be affected by issue5165. (64K length, we are well
> covered)
>
> diff --git a/mercurial/bookmarks.py b/mercurial/bookmarks.py
> --- a/mercurial/bookmarks.py
> +++ b/mercurial/bookmarks.py
> @@ -8,12 +8,14 @@
>  from __future__ import absolute_import
>
>  import errno
> +import struct
>
>  from .i18n import _
>  from .node import (
>      bin,
>      hex,
>      short,
> +    wdirid,
>  )
>  from . import (
>      encoding,
> @@ -550,6 +552,58 @@
>          binremotemarks[name] = bin(node)
>      return binremotemarks
>
> +_binaryentry = struct.Struct('>20sH')
> +
> +def binaryencode(bookmarks):
> +    """encode a '(bookmark, node)' iterable into a binary stream
> +
> +    the binary format is:
> +
> +        <node><bookmark-length><bookmark-name>
> +
> +    :node: is a 20 bytes binary node,
> +    :bookmark-length: an unsigned short,
> +    :bookmark-name: the name of the bookmark (of length <bookmark-length>)
> +
> +    wdirid (all bits set) will be used as a special value for "missing"
> +    """
> +    binarydata = []
> +    for book, node in bookmarks:
> +        if not node: # None or ''
> +            node = wdirid
> +        binarydata.append(_binaryentry.pack(node, len(book)))
> +        binarydata.append(book)
> +    return ''.join(binarydata)
> +
> +def binarydecode(stream):
> +    """decode a binary stream into an '(bookmark, node)' iterable
> +
> +    the binary format is:
> +
> +        <node><bookmark-length><bookmark-name>
> +
> +    :node: is a 20 bytes binary node,
> +    :bookmark-length: an unsigned short,
> +    :bookmark-name: the name of the bookmark (of length
> <bookmark-length>))
> +
> +    wdirid (all bits set) will be used as a special value for "missing"
> +    """
> +    entrysize = _binaryentry.size
> +    while True:
> +        entry = stream.read(entrysize)
> +        if len(entry) < entrysize:
> +            if entry:
> +                raise error.Abort(_('bad bookmark stream'))
> +            break
> +        node, length = _binaryentry.unpack(entry)
> +        bookmark = stream.read(length)
> +        if len(bookmark) < length:
> +            if entry:
> +                raise error.Abort(_('bad bookmark stream'))
> +        if node == wdirid:
> +            node = None
> +        yield (bookmark, node)
>
>
Nit: no need for parens here.

More importantly, combining generators and code that does I/O in the same
function is a bit dangerous because another consumer of the passed in
stream could seek it while the generator is still running. For parsing
small payloads, I have a preference for doing all the I/O up front rather
than mixing a generator with I/O.

Also, I would expect symmetry between the encode and decode functions.
Currently, one is a generator and the other isn't.

What do you think?


>  def updatefromremote(ui, repo, remotemarks, path, trfunc, explicit=()):
>      ui.debug("checking for updated bookmarks\n")
>      localmarks = repo._bookmarks
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>

Patch

diff --git a/mercurial/bookmarks.py b/mercurial/bookmarks.py
--- a/mercurial/bookmarks.py
+++ b/mercurial/bookmarks.py
@@ -8,12 +8,14 @@ 
 from __future__ import absolute_import
 
 import errno
+import struct
 
 from .i18n import _
 from .node import (
     bin,
     hex,
     short,
+    wdirid,
 )
 from . import (
     encoding,
@@ -550,6 +552,58 @@ 
         binremotemarks[name] = bin(node)
     return binremotemarks
 
+_binaryentry = struct.Struct('>20sH')
+
+def binaryencode(bookmarks):
+    """encode a '(bookmark, node)' iterable into a binary stream
+
+    the binary format is:
+
+        <node><bookmark-length><bookmark-name>
+
+    :node: is a 20 bytes binary node,
+    :bookmark-length: an unsigned short,
+    :bookmark-name: the name of the bookmark (of length <bookmark-length>)
+
+    wdirid (all bits set) will be used as a special value for "missing"
+    """
+    binarydata = []
+    for book, node in bookmarks:
+        if not node: # None or ''
+            node = wdirid
+        binarydata.append(_binaryentry.pack(node, len(book)))
+        binarydata.append(book)
+    return ''.join(binarydata)
+
+def binarydecode(stream):
+    """decode a binary stream into an '(bookmark, node)' iterable
+
+    the binary format is:
+
+        <node><bookmark-length><bookmark-name>
+
+    :node: is a 20 bytes binary node,
+    :bookmark-length: an unsigned short,
+    :bookmark-name: the name of the bookmark (of length <bookmark-length>))
+
+    wdirid (all bits set) will be used as a special value for "missing"
+    """
+    entrysize = _binaryentry.size
+    while True:
+        entry = stream.read(entrysize)
+        if len(entry) < entrysize:
+            if entry:
+                raise error.Abort(_('bad bookmark stream'))
+            break
+        node, length = _binaryentry.unpack(entry)
+        bookmark = stream.read(length)
+        if len(bookmark) < length:
+            if entry:
+                raise error.Abort(_('bad bookmark stream'))
+        if node == wdirid:
+            node = None
+        yield (bookmark, node)
+
 def updatefromremote(ui, repo, remotemarks, path, trfunc, explicit=()):
     ui.debug("checking for updated bookmarks\n")
     localmarks = repo._bookmarks