Patchwork [1,of,2] template: add CBOR output format

login
register
mail settings
Submitter Yuya Nishihara
Date March 10, 2019, 5:56 a.m.
Message ID <343027851edc0337e405.1552197408@mimosa>
Download mbox | patch
Permalink /patch/39198/
State Accepted
Headers show

Comments

Yuya Nishihara - March 10, 2019, 5:56 a.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1552190244 -32400
#      Sun Mar 10 12:57:24 2019 +0900
# Node ID 343027851edc0337e4058feb6f51d67dc584bc0b
# Parent  9d4ae5044b4c96bdfb2bbb33fa696908b664a1d7
template: add CBOR output format

The whole output is wrapped as an array just like the other serialization
formats. It's an indefinite-length array since the size is unknown while
encoding. Maybe we can add 'cbor-stream' (and 'pickle-stream') as needed.
Josef 'Jeff' Sipek - March 10, 2019, 8:24 p.m.
On Sun, Mar 10, 2019 at 14:56:48 +0900, Yuya Nishihara wrote:
> # HG changeset patch
> # User Yuya Nishihara <yuya@tcha.org>
> # Date 1552190244 -32400
> #      Sun Mar 10 12:57:24 2019 +0900
> # Node ID 343027851edc0337e4058feb6f51d67dc584bc0b
> # Parent  9d4ae5044b4c96bdfb2bbb33fa696908b664a1d7
> template: add CBOR output format
> 
> The whole output is wrapped as an array just like the other serialization
> formats. It's an indefinite-length array since the size is unknown while
> encoding. Maybe we can add 'cbor-stream' (and 'pickle-stream') as needed.

FWIW, cbor sequences are what I think you have in mind for cbor-stream:

https://mailarchive.ietf.org/arch/msg/cbor/3MMQdOMd6ESrMQPFPzSORlsLfiY

Jeff.

> 
> diff --git a/mercurial/formatter.py b/mercurial/formatter.py
> --- a/mercurial/formatter.py
> +++ b/mercurial/formatter.py
> @@ -130,6 +130,7 @@ from . import (
>      util,
>  )
>  from .utils import (
> +    cborutil,
>      dateutil,
>      stringutil,
>  )
> @@ -341,6 +342,18 @@ class pickleformatter(baseformatter):
>          baseformatter.end(self)
>          self._out.write(pickle.dumps(self._data))
>  
> +class cborformatter(baseformatter):
> +    '''serialize items as an indefinite-length CBOR array'''
> +    def __init__(self, ui, out, topic, opts):
> +        baseformatter.__init__(self, ui, topic, opts, _nullconverter)
> +        self._out = out
> +        self._out.write(cborutil.BEGIN_INDEFINITE_ARRAY)
> +    def _showitem(self):
> +        self._out.write(b''.join(cborutil.streamencode(self._item)))
> +    def end(self):
> +        baseformatter.end(self)
> +        self._out.write(cborutil.BREAK)
> +
>  class jsonformatter(baseformatter):
>      def __init__(self, ui, out, topic, opts):
>          baseformatter.__init__(self, ui, topic, opts, _nullconverter)
> @@ -617,7 +630,9 @@ class templateresources(templater.resour
>  
>  def formatter(ui, out, topic, opts):
>      template = opts.get("template", "")
> -    if template == "json":
> +    if template == "cbor":
> +        return cborformatter(ui, out, topic, opts)
> +    elif template == "json":
>          return jsonformatter(ui, out, topic, opts)
>      elif template == "pickle":
>          return pickleformatter(ui, out, topic, opts)
> diff --git a/mercurial/help/scripting.txt b/mercurial/help/scripting.txt
> --- a/mercurial/help/scripting.txt
> +++ b/mercurial/help/scripting.txt
> @@ -142,9 +142,11 @@ output containing authors, dates, descri
>     using templates to make your life easier.
>  
>  The ``-T/--template`` argument allows specifying pre-defined styles.
> -Mercurial ships with the machine-readable styles ``json`` and ``xml``,
> -which provide JSON and XML output, respectively. These are useful for
> -producing output that is machine readable as-is.
> +Mercurial ships with the machine-readable styles ``cbor``, ``json``,
> +and ``xml``, which provide CBOR, JSON, and XML output, respectively.
> +These are useful for producing output that is machine readable as-is.
> +
> +(Mercurial 5.0 is required for CBOR style.)
>  
>  .. important::
>  
> diff --git a/mercurial/logcmdutil.py b/mercurial/logcmdutil.py
> --- a/mercurial/logcmdutil.py
> +++ b/mercurial/logcmdutil.py
> @@ -542,7 +542,7 @@ def changesetdisplayer(ui, repo, opts, d
>      regular display via changesetprinter() is done.
>      """
>      postargs = (differ, opts, buffered)
> -    if opts.get('template') == 'json':
> +    if opts.get('template') in {'cbor', 'json'}:
>          fm = ui.formatter('log', opts)
>          return changesetformatter(ui, repo, fm, *postargs)
>  
> diff --git a/tests/test-template-map.t b/tests/test-template-map.t
> --- a/tests/test-template-map.t
> +++ b/tests/test-template-map.t
> @@ -669,6 +669,70 @@ Test xml styles:
>    </log>
>  
>  
> +test CBOR style:
> +
> +  $ cat <<'EOF' > "$TESTTMP/decodecborarray.py"
> +  > from __future__ import absolute_import
> +  > from mercurial import pycompat
> +  > from mercurial.utils import (
> +  >     cborutil,
> +  >     stringutil,
> +  > )
> +  > data = pycompat.stdin.read()
> +  > # our CBOR decoder doesn't support parsing indefinite-length arrays,
> +  > # but the log output is indefinite stream by nature.
> +  > assert data[:1] == cborutil.BEGIN_INDEFINITE_ARRAY
> +  > assert data[-1:] == cborutil.BREAK
> +  > items = cborutil.decodeall(data[1:-1])
> +  > pycompat.stdout.write(stringutil.pprint(items, indent=1) + b'\n')
> +  > EOF
> +
> +  $ hg log -k nosuch -Tcbor | "$PYTHON" "$TESTTMP/decodecborarray.py"
> +  []
> +
> +  $ hg log -qr0:1 -Tcbor | "$PYTHON" "$TESTTMP/decodecborarray.py"
> +  [
> +   {
> +    'node': '1e4e1b8f71e05681d422154f5421e385fec3454f',
> +    'rev': 0
> +   },
> +   {
> +    'node': 'b608e9d1a3f0273ccf70fb85fd6866b3482bf965',
> +    'rev': 1
> +   }
> +  ]
> +
> +  $ hg log -vpr . -Tcbor --stat | "$PYTHON" "$TESTTMP/decodecborarray.py"
> +  [
> +   {
> +    'bookmarks': [],
> +    'branch': 'default',
> +    'date': [
> +     1577872860,
> +     0
> +    ],
> +    'desc': 'third',
> +    'diff': 'diff -r 29114dbae42b -r 95c24699272e fourth\n--- /dev/null\tThu Jan 01 00:00:00 1970 +0000\n+++ b/fourth\tWed Jan 01 10:01:00 2020 +0000\n@@ -0,0 +1,1 @@\n+second\ndiff -r 29114dbae42b -r 95c24699272e second\n--- a/second\tMon Jan 12 13:46:40 1970 +0000\n+++ /dev/null\tThu Jan 01 00:00:00 1970 +0000\n@@ -1,1 +0,0 @@\n-second\ndiff -r 29114dbae42b -r 95c24699272e third\n--- /dev/null\tThu Jan 01 00:00:00 1970 +0000\n+++ b/third\tWed Jan 01 10:01:00 2020 +0000\n@@ -0,0 +1,1 @@\n+third\n',
> +    'diffstat': ' fourth |  1 +\n second |  1 -\n third  |  1 +\n 3 files changed, 2 insertions(+), 1 deletions(-)\n',
> +    'files': [
> +     'fourth',
> +     'second',
> +     'third'
> +    ],
> +    'node': '95c24699272ef57d062b8bccc32c878bf841784a',
> +    'parents': [
> +     '29114dbae42b9f078cf2714dbe3a86bba8ec7453'
> +    ],
> +    'phase': 'draft',
> +    'rev': 8,
> +    'tags': [
> +     'tip'
> +    ],
> +    'user': 'test'
> +   }
> +  ]
> +
> +
>  Test JSON style:
>  
>    $ hg log -k nosuch -Tjson
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Pulkit Goyal - March 22, 2019, 5:03 p.m.
On Sun, Mar 10, 2019 at 8:58 AM Yuya Nishihara <yuya@tcha.org> wrote:

> # HG changeset patch
> # User Yuya Nishihara <yuya@tcha.org>
> # Date 1552190244 -32400
> #      Sun Mar 10 12:57:24 2019 +0900
> # Node ID 343027851edc0337e4058feb6f51d67dc584bc0b
> # Parent  9d4ae5044b4c96bdfb2bbb33fa696908b664a1d7
> template: add CBOR output format
>
> The whole output is wrapped as an array just like the other serialization
> formats. It's an indefinite-length array since the size is unknown while
> encoding. Maybe we can add 'cbor-stream' (and 'pickle-stream') as needed.
>

Thanks a lot for coding this up. Queued the series!
via Mercurial-devel - March 23, 2019, 3:07 a.m.
On Sun, 10 Mar 2019 00:56:48 -0500, Yuya Nishihara <yuya@tcha.org> wrote:

> # HG changeset patch
> # User Yuya Nishihara <yuya@tcha.org>
> # Date 1552190244 -32400
> #      Sun Mar 10 12:57:24 2019 +0900
> # Node ID 343027851edc0337e4058feb6f51d67dc584bc0b
> # Parent  9d4ae5044b4c96bdfb2bbb33fa696908b664a1d7
> template: add CBOR output format
>
> The whole output is wrapped as an array just like the other serialization
> formats. It's an indefinite-length array since the size is unknown while
> encoding. Maybe we can add 'cbor-stream' (and 'pickle-stream') as needed.

> +  $ hg log -vpr . -Tcbor --stat | "$PYTHON"  
> "$TESTTMP/decodecborarray.py"
> +  [
> +   {
> +    'bookmarks': [],
> +    'branch': 'default',
> +    'date': [
> +     1577872860,
> +     0
> +    ],
> +    'desc': 'third',
> +    'diff': 'diff -r 29114dbae42b -r 95c24699272e fourth\n---  
> /dev/null\tThu Jan 01 00:00:00 1970 +0000\n+++ b/fourth\tWed Jan 01  
> 10:01:00 2020 +0000\n@@ -0,0 +1,1 @@\n+second\ndiff -r 29114dbae42b -r  
> 95c24699272e second\n--- a/second\tMon Jan 12 13:46:40 1970 +0000\n+++  
> /dev/null\tThu Jan 01 00:00:00 1970 +0000\n@@ -1,1 +0,0  
> @@\n-second\ndiff -r 29114dbae42b -r 95c24699272e third\n---  
> /dev/null\tThu Jan 01 00:00:00 1970 +0000\n+++ b/third\tWed Jan 01  
> 10:01:00 2020 +0000\n@@ -0,0 +1,1 @@\n+third\n',
> +    'diffstat': ' fourth |  1 +\n second |  1 -\n third  |  1 +\n 3  
> files changed, 2 insertions(+), 1 deletions(-)\n',
> +    'files': [
> +     'fourth',
> +     'second',
> +     'third'
> +    ],
> +    'node': '95c24699272ef57d062b8bccc32c878bf841784a',
> +    'parents': [
> +     '29114dbae42b9f078cf2714dbe3a86bba8ec7453'
> +    ],
> +    'phase': 'draft',
> +    'rev': 8,
> +    'tags': [
> +     'tip'
> +    ],
> +    'user': 'test'
> +   }
> +  ]

Not sure why, but this is failing with an assertion on Windows[1].   
Printing out data just prior to the exception shows:

     \x9f\xadIbookmarks\x80FbranchGdefaultDdate\x82

cborformatter.end() is being called, but it's like the \xff byte isn't  
being output.  Adding a flush there didn't help.

[1]  
https://buildbot.mercurial-scm.org/builders/Win7%20x86_64%20hg%20tests/builds/1334/steps/run-tests.py%20%28python%202.7.13%29/logs/stdio

Patch

diff --git a/mercurial/formatter.py b/mercurial/formatter.py
--- a/mercurial/formatter.py
+++ b/mercurial/formatter.py
@@ -130,6 +130,7 @@  from . import (
     util,
 )
 from .utils import (
+    cborutil,
     dateutil,
     stringutil,
 )
@@ -341,6 +342,18 @@  class pickleformatter(baseformatter):
         baseformatter.end(self)
         self._out.write(pickle.dumps(self._data))
 
+class cborformatter(baseformatter):
+    '''serialize items as an indefinite-length CBOR array'''
+    def __init__(self, ui, out, topic, opts):
+        baseformatter.__init__(self, ui, topic, opts, _nullconverter)
+        self._out = out
+        self._out.write(cborutil.BEGIN_INDEFINITE_ARRAY)
+    def _showitem(self):
+        self._out.write(b''.join(cborutil.streamencode(self._item)))
+    def end(self):
+        baseformatter.end(self)
+        self._out.write(cborutil.BREAK)
+
 class jsonformatter(baseformatter):
     def __init__(self, ui, out, topic, opts):
         baseformatter.__init__(self, ui, topic, opts, _nullconverter)
@@ -617,7 +630,9 @@  class templateresources(templater.resour
 
 def formatter(ui, out, topic, opts):
     template = opts.get("template", "")
-    if template == "json":
+    if template == "cbor":
+        return cborformatter(ui, out, topic, opts)
+    elif template == "json":
         return jsonformatter(ui, out, topic, opts)
     elif template == "pickle":
         return pickleformatter(ui, out, topic, opts)
diff --git a/mercurial/help/scripting.txt b/mercurial/help/scripting.txt
--- a/mercurial/help/scripting.txt
+++ b/mercurial/help/scripting.txt
@@ -142,9 +142,11 @@  output containing authors, dates, descri
    using templates to make your life easier.
 
 The ``-T/--template`` argument allows specifying pre-defined styles.
-Mercurial ships with the machine-readable styles ``json`` and ``xml``,
-which provide JSON and XML output, respectively. These are useful for
-producing output that is machine readable as-is.
+Mercurial ships with the machine-readable styles ``cbor``, ``json``,
+and ``xml``, which provide CBOR, JSON, and XML output, respectively.
+These are useful for producing output that is machine readable as-is.
+
+(Mercurial 5.0 is required for CBOR style.)
 
 .. important::
 
diff --git a/mercurial/logcmdutil.py b/mercurial/logcmdutil.py
--- a/mercurial/logcmdutil.py
+++ b/mercurial/logcmdutil.py
@@ -542,7 +542,7 @@  def changesetdisplayer(ui, repo, opts, d
     regular display via changesetprinter() is done.
     """
     postargs = (differ, opts, buffered)
-    if opts.get('template') == 'json':
+    if opts.get('template') in {'cbor', 'json'}:
         fm = ui.formatter('log', opts)
         return changesetformatter(ui, repo, fm, *postargs)
 
diff --git a/tests/test-template-map.t b/tests/test-template-map.t
--- a/tests/test-template-map.t
+++ b/tests/test-template-map.t
@@ -669,6 +669,70 @@  Test xml styles:
   </log>
 
 
+test CBOR style:
+
+  $ cat <<'EOF' > "$TESTTMP/decodecborarray.py"
+  > from __future__ import absolute_import
+  > from mercurial import pycompat
+  > from mercurial.utils import (
+  >     cborutil,
+  >     stringutil,
+  > )
+  > data = pycompat.stdin.read()
+  > # our CBOR decoder doesn't support parsing indefinite-length arrays,
+  > # but the log output is indefinite stream by nature.
+  > assert data[:1] == cborutil.BEGIN_INDEFINITE_ARRAY
+  > assert data[-1:] == cborutil.BREAK
+  > items = cborutil.decodeall(data[1:-1])
+  > pycompat.stdout.write(stringutil.pprint(items, indent=1) + b'\n')
+  > EOF
+
+  $ hg log -k nosuch -Tcbor | "$PYTHON" "$TESTTMP/decodecborarray.py"
+  []
+
+  $ hg log -qr0:1 -Tcbor | "$PYTHON" "$TESTTMP/decodecborarray.py"
+  [
+   {
+    'node': '1e4e1b8f71e05681d422154f5421e385fec3454f',
+    'rev': 0
+   },
+   {
+    'node': 'b608e9d1a3f0273ccf70fb85fd6866b3482bf965',
+    'rev': 1
+   }
+  ]
+
+  $ hg log -vpr . -Tcbor --stat | "$PYTHON" "$TESTTMP/decodecborarray.py"
+  [
+   {
+    'bookmarks': [],
+    'branch': 'default',
+    'date': [
+     1577872860,
+     0
+    ],
+    'desc': 'third',
+    'diff': 'diff -r 29114dbae42b -r 95c24699272e fourth\n--- /dev/null\tThu Jan 01 00:00:00 1970 +0000\n+++ b/fourth\tWed Jan 01 10:01:00 2020 +0000\n@@ -0,0 +1,1 @@\n+second\ndiff -r 29114dbae42b -r 95c24699272e second\n--- a/second\tMon Jan 12 13:46:40 1970 +0000\n+++ /dev/null\tThu Jan 01 00:00:00 1970 +0000\n@@ -1,1 +0,0 @@\n-second\ndiff -r 29114dbae42b -r 95c24699272e third\n--- /dev/null\tThu Jan 01 00:00:00 1970 +0000\n+++ b/third\tWed Jan 01 10:01:00 2020 +0000\n@@ -0,0 +1,1 @@\n+third\n',
+    'diffstat': ' fourth |  1 +\n second |  1 -\n third  |  1 +\n 3 files changed, 2 insertions(+), 1 deletions(-)\n',
+    'files': [
+     'fourth',
+     'second',
+     'third'
+    ],
+    'node': '95c24699272ef57d062b8bccc32c878bf841784a',
+    'parents': [
+     '29114dbae42b9f078cf2714dbe3a86bba8ec7453'
+    ],
+    'phase': 'draft',
+    'rev': 8,
+    'tags': [
+     'tip'
+    ],
+    'user': 'test'
+   }
+  ]
+
+
 Test JSON style:
 
   $ hg log -k nosuch -Tjson