new file mode 100644
@@ -0,0 +1,235 @@
+from __future__ import absolute_import
+
+import io
+import unittest
+
+from mercurial.thirdparty import (
+ cbor,
+)
+from mercurial.utils import (
+ cborutil,
+)
+
+class IndefiniteBytestringTests(unittest.TestCase):
+ def testitertoiter(self):
+ # This is the example from RFC 7049 Section 2.2.2.
+ source = [b'\xaa\xbb\xcc\xdd', b'\xee\xff\x99']
+
+ it = cborutil.itertoindefinitebytestring(source)
+
+ self.assertEqual(next(it), b'\x5f')
+ self.assertEqual(next(it), b'\x44')
+ self.assertEqual(next(it), b'\xaa\xbb\xcc\xdd')
+ self.assertEqual(next(it), b'\x43')
+ self.assertEqual(next(it), b'\xee\xff\x99')
+ self.assertEqual(next(it), b'\xff')
+
+ with self.assertRaises(StopIteration):
+ next(it)
+
+ dest = b''.join(cborutil.itertoindefinitebytestring(source))
+
+ self.assertEqual(cbor.loads(dest), b''.join(source))
+
+ def testreadtoiter(self):
+ source = io.BytesIO(b'\x5f\x44\xaa\xbb\xcc\xdd\x43\xee\xff\x99\xff')
+
+ it = cborutil.readindefinitebytestringtoiter(source)
+ self.assertEqual(next(it), b'\xaa\xbb\xcc\xdd')
+ self.assertEqual(next(it), b'\xee\xff\x99')
+
+ with self.assertRaises(StopIteration):
+ next(it)
+
+ def testtoiterlarge(self):
+ source = [b'a' * 16, b'b' * 128, b'c' * 1024, b'd' * 1048576]
+
+ dest = b''.join(cborutil.itertoindefinitebytestring(source))
+
+ self.assertEqual(cbor.loads(dest), b''.join(source))
+
+ def testbuffertoindefinite(self):
+ source = b'\x00\x01\x02\x03' + b'\xff' * 16384
+
+ it = cborutil.buffertoindefinitebytestring(source, chunksize=2)
+
+ self.assertEqual(next(it), b'\x5f')
+ self.assertEqual(next(it), b'\x42')
+ self.assertEqual(next(it), b'\x00\x01')
+ self.assertEqual(next(it), b'\x42')
+ self.assertEqual(next(it), b'\x02\x03')
+ self.assertEqual(next(it), b'\x42')
+ self.assertEqual(next(it), b'\xff\xff')
+
+ def testbuffertoindefiniteroundtrip(self):
+ source = b'x' * 1048576
+
+ chunks = list(cborutil.buffertoindefinitebytestring(source))
+ self.assertEqual(len(chunks), 34)
+
+ self.assertEqual(cbor.loads(b''.join(chunks)), source)
+
+class StreamArrayTests(unittest.TestCase):
+ def testempty(self):
+ b = io.BytesIO()
+ encoder = cbor.CBOREncoder(b)
+
+ with cborutil.streamarray(encoder):
+ pass
+
+ self.assertEqual(b.getvalue(), '\x9f\xff')
+ self.assertEqual(cbor.loads(b.getvalue()), [])
+
+ def testone(self):
+ b = io.BytesIO()
+ encoder = cbor.CBOREncoder(b)
+
+ with cborutil.streamarray(encoder) as fn:
+ fn(b'foo')
+
+ self.assertEqual(cbor.loads(b.getvalue()), [b'foo'])
+
+ def testmultiple(self):
+ b = io.BytesIO()
+ encoder = cbor.CBOREncoder(b)
+
+ with cborutil.streamarray(encoder) as fn:
+ fn(0)
+ fn(True)
+ fn(b'foo')
+ fn(None)
+
+ self.assertEqual(cbor.loads(b.getvalue()), [0, True, b'foo', None])
+
+ def testnested(self):
+ b = io.BytesIO()
+ encoder = cbor.CBOREncoder(b)
+
+ with cborutil.streamarray(encoder):
+ with cborutil.streamarray(encoder) as fn:
+ fn(b'foo')
+ fn(b'bar')
+
+ self.assertEqual(cbor.loads(b.getvalue()), [[b'foo', b'bar']])
+
+ def testitemslist(self):
+ b = io.BytesIO()
+ encoder = cbor.CBOREncoder(b)
+
+ orig = [b'foo', b'bar', None, True, 42]
+
+ cborutil.streamarrayitems(encoder, orig)
+ self.assertEqual(cbor.loads(b.getvalue()), orig)
+
+ def testitemsgen(self):
+ def makeitems():
+ yield b'foo'
+ yield b'bar'
+ yield None
+ yield 42
+
+ b = io.BytesIO()
+ encoder = cbor.CBOREncoder(b)
+
+ cborutil.streamarrayitems(encoder, makeitems())
+ self.assertEqual(cbor.loads(b.getvalue()), [b'foo', b'bar', None, 42])
+
+class StreamMapTests(unittest.TestCase):
+ def testempty(self):
+ b = io.BytesIO()
+ encoder = cbor.CBOREncoder(b)
+
+ with cborutil.streammap(encoder):
+ pass
+
+ self.assertEqual(b.getvalue(), '\xbf\xff')
+ self.assertEqual(cbor.loads(b.getvalue()), {})
+
+ def testone(self):
+ b = io.BytesIO()
+ encoder = cbor.CBOREncoder(b)
+
+ with cborutil.streammap(encoder) as fn:
+ fn(b'key1', b'value1')
+
+ self.assertEqual(cbor.loads(b.getvalue()), {b'key1': b'value1'})
+
+ def testmultiple(self):
+ b = io.BytesIO()
+ encoder = cbor.CBOREncoder(b)
+
+ with cborutil.streammap(encoder) as fn:
+ fn(0, 1)
+ fn(b'key1', b'value1')
+ fn(True, None)
+
+ self.assertEqual(cbor.loads(b.getvalue()), {
+ 0: 1,
+ b'key1': b'value1',
+ True: None,
+ })
+
+ def testcomplex(self):
+ b = io.BytesIO()
+ encoder = cbor.CBOREncoder(b)
+
+ with cborutil.streammap(encoder) as fn:
+ fn(b'key1', b'value1')
+ fn(b'map', {b'inner1key': b'inner1value'})
+ fn(b'array', [0, 1, 2])
+
+ self.assertEqual(cbor.loads(b.getvalue()), {
+ b'key1': b'value1',
+ b'map': {b'inner1key': b'inner1value'},
+ b'array': [0, 1, 2],
+ })
+
+ def testnested(self):
+ b = io.BytesIO()
+ encoder = cbor.CBOREncoder(b)
+
+ with cborutil.streammap(encoder):
+ encoder.encode(b'streamkey')
+ with cborutil.streammap(encoder) as fn2:
+ fn2(b'inner1key', b'inner1value')
+ fn2(0, 1)
+
+ self.assertEqual(cbor.loads(b.getvalue()), {
+ b'streamkey': {
+ b'inner1key': b'inner1value',
+ 0: 1,
+ },
+ })
+
+ def testitemsdict(self):
+ b = io.BytesIO()
+ encoder = cbor.CBOREncoder(b)
+
+ orig = [
+ (b'foo', b'bar'),
+ (42, 19),
+ (None, True),
+ ]
+
+ cborutil.streammapitems(encoder, orig)
+ self.assertEqual(cbor.loads(b.getvalue()), dict(orig))
+
+ def testitemsgen(self):
+ def makeitems():
+ yield b'foo', b'bar'
+ yield None, True
+ yield 42, 19
+
+ b = io.BytesIO()
+ encoder = cbor.CBOREncoder(b)
+
+ cborutil.streammapitems(encoder, makeitems())
+ self.assertEqual(cbor.loads(b.getvalue()), {
+ b'foo': b'bar',
+ None: True,
+ 42: 19,
+ })
+
+if __name__ == '__main__':
+ import silenttestrunner
+ silenttestrunner.main(__name__)
new file mode 100644
@@ -0,0 +1,206 @@
+# cborutil.py - CBOR extensions
+#
+# Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import contextlib
+import struct
+
+from ..thirdparty.cbor.cbor2 import (
+ decoder as decodermod,
+ encoder as encodermod,
+)
+
+# Very short very of RFC 7049...
+#
+# Each item begins with a byte. The 3 high bits of that byte denote the
+# "major type." The lower 5 bits denote the "subtype." Each major type
+# has its own encoding mechanism.
+#
+# Most types have lengths. However, bytestring, string, array, and map
+# can be indefinite length. These are denotes by a subtype with value 31.
+# Sub-components of those types then come afterwards and are terminated
+# by a "break" byte.
+
+MAJOR_TYPE_UINT = 0
+MAJOR_TYPE_NEGINT = 1
+MAJOR_TYPE_BYTESTRING = 2
+MAJOR_TYPE_STRING = 3
+MAJOR_TYPE_ARRAY = 4
+MAJOR_TYPE_MAP = 5
+MAJOR_TYPE_SEMANTIC = 6
+MAJOR_TYPE_SPECIAL = 7
+
+SUBTYPE_MASK = 0b00011111
+
+SUBTYPE_INDEFINITE = 31
+
+# Indefinite types begin with their major type ORd with information value 31.
+BEGIN_INDEFINITE_BYTESTRING = struct.pack(
+ r'>B', MAJOR_TYPE_BYTESTRING << 5 | SUBTYPE_INDEFINITE)
+BEGIN_INDEFINITE_ARRAY = struct.pack(
+ r'>B', MAJOR_TYPE_ARRAY << 5 | SUBTYPE_INDEFINITE)
+BEGIN_INDEFINITE_MAP = struct.pack(
+ r'>B', MAJOR_TYPE_MAP << 5 | SUBTYPE_INDEFINITE)
+
+# The break ends an indefinite length item.
+BREAK = b'\xff'
+BREAK_INT = 255
+
+def beginindefinitearray(encoder):
+ encoder.write(BEGIN_INDEFINITE_ARRAY)
+
+def beginindefinitemap(encoder):
+ encoder.write(BEGIN_INDEFINITE_MAP)
+
+@contextlib.contextmanager
+def streamarray(encoder):
+ """Write an array in a streaming manner.
+
+ Used as a context manager, the context manager resolves to a function
+ that should be called for each item to write to the array.
+
+ When the context manager exits, the indefinite length array is ended.
+ """
+ def writeitem(value):
+ encoder.encode(value)
+
+ beginindefinitearray(encoder)
+ yield writeitem
+ encoder.write(BREAK)
+
+def streamarrayitems(encoder, items):
+ """Write out an iterable of items to a streaming array."""
+ with streamarray(encoder) as fn:
+ for value in items:
+ fn(value)
+
+@contextlib.contextmanager
+def streammap(encoder):
+ """Write a map in a streaming manner.
+
+ Used as a context manager, the context manager resolves to a function
+ that should be called with a key and value of each map item to write.
+
+ When the context manager exits, the indefinite length map is ended.
+
+ If is possible to nest streaming data structures. If the caller writes
+ out 2 values, the first value will be interpreted as a key and the second
+ a value. So a caller could do something like::
+
+ with streammap(encoder):
+ encoder.encode(b'mykey')
+ with streammap(encoder) as fn:
+ fn(b'innerkey', b'value')
+
+ This would decode to ``{b'mykey': {b'innerkey': b'value'}}``.
+ """
+ def writeitem(key, value):
+ encoder.encode(key)
+ encoder.encode(value)
+
+ beginindefinitemap(encoder)
+ yield writeitem
+ encoder.write(BREAK)
+
+def streammapitems(encoder, items):
+ """Write out an iterable of (key, value) items to a streaming map."""
+ with streammap(encoder) as fn:
+ for key, value in items:
+ fn(key, value)
+
+def itertoindefinitebytestring(it):
+ """Convert an iterator of chunks to an indefinite bytestring.
+
+ Given an input that is iterable and each element in the iterator is
+ representable as bytes, emit an indefinite length bytestring.
+ """
+ # Alias for performance.
+ encodelen = encodermod.encode_length
+ bytestringmajor = MAJOR_TYPE_BYTESTRING << 5
+
+ yield BEGIN_INDEFINITE_BYTESTRING
+
+ for chunk in it:
+ yield encodelen(bytestringmajor, len(chunk))
+ yield chunk
+
+ yield BREAK
+
+def buffertoindefinitebytestring(source, chunksize=65536):
+ """Given a large source buffer, emit as an indefinite length bytestring.
+
+ This is a generator of chunks constituting the encoded CBOR data.
+ """
+ # Alias for performance.
+ encodelen = encodermod.encode_length
+ bytestringmajor = MAJOR_TYPE_BYTESTRING << 5
+
+ yield BEGIN_INDEFINITE_BYTESTRING
+
+ i = 0
+ l = len(source)
+
+ while True:
+ chunk = source[i:i + chunksize]
+ i += len(chunk)
+
+ yield encodelen(bytestringmajor, len(chunk))
+ yield chunk
+
+ if i >= l:
+ break
+
+ yield BREAK
+
+def readindefinitebytestringtoiter(fh, expectheader=True):
+ """Read an indefinite bytestring to a generator.
+
+ Receives an object with a ``read(X)`` method to read N bytes.
+
+ If ``expectheader`` is True, it is expected that the first byte read
+ will represent an indefinite length bytestring. Otherwise, we
+ expect the first byte to be part of the first bytestring chunk.
+ """
+ read = fh.read
+ decodeuint = decodermod.decode_uint
+ byteasinteger = decodermod.byte_as_integer
+
+ if expectheader:
+ initial = decodermod.byte_as_integer(read(1))
+
+ majortype = initial >> 5
+ subtype = initial & SUBTYPE_MASK
+
+ if majortype != MAJOR_TYPE_BYTESTRING:
+ raise decodermod.CBORDecodeError(
+ 'expected major type %d; got %d' % (MAJOR_TYPE_BYTESTRING,
+ majortype))
+
+ if subtype != SUBTYPE_INDEFINITE:
+ raise decodermod.CBORDecodeError(
+ 'expected indefinite subtype; got %d' % subtype)
+
+ # The indefinite bytestring is composed of chunks of normal bytestrings.
+ # Read chunks until we hit a BREAK byte.
+
+ while True:
+ # We need to sniff for the BREAK byte.
+ initial = byteasinteger(read(1))
+
+ if initial == BREAK_INT:
+ break
+
+ length = decodeuint(fh, initial & SUBTYPE_MASK)
+ chunk = read(length)
+
+ if len(chunk) != length:
+ raise decodermod.CBORDecodeError(
+ 'failed to read bytestring chunk: got %d bytes; expected %d' % (
+ len(chunk), length))
+
+ yield chunk
@@ -36,6 +36,8 @@
'mercurial.pure.parsers',
# third-party imports should be directly imported
'mercurial.thirdparty',
+ 'mercurial.thirdparty.cbor',
+ 'mercurial.thirdparty.cbor.cbor2',
'mercurial.thirdparty.zope',
'mercurial.thirdparty.zope.interface',
)