Patchwork [5,of,7] templatefilters: make json filter be byte-transparent (BC) (issue4926)

login
register
mail settings
Submitter Yuya Nishihara
Date Feb. 23, 2016, 3:45 p.m.
Message ID <0a325e2f77abaee6f8c5.1456242330@mimosa>
Download mbox | patch
Permalink /patch/13317/
State Accepted
Delegated to: Augie Fackler
Headers show

Comments

Yuya Nishihara - Feb. 23, 2016, 3:45 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1451206797 -32400
#      Sun Dec 27 17:59:57 2015 +0900
# Node ID 0a325e2f77abaee6f8c58ae61d6a502ef94497b0
# Parent  8da9d7c9192d359eec2eb6b432612072cdc0b5e2
templatefilters: make json filter be byte-transparent (BC) (issue4926)

This is necessary to preserve filename encoding over JSON. Instead, this
patch inserts "|utf8" where non-ascii local-encoding texts can be passed
to "|json".

See also the commit that introduced "utf8" filter.

Patch

diff --git a/mercurial/hgweb/webcommands.py b/mercurial/hgweb/webcommands.py
--- a/mercurial/hgweb/webcommands.py
+++ b/mercurial/hgweb/webcommands.py
@@ -1196,7 +1196,8 @@  def graph(web, req, tmpl):
                 canvaswidth=(cols + 1) * bg_height,
                 truecanvasheight=rows * bg_height,
                 canvasheight=canvasheight, bg_height=bg_height,
-                jsdata=lambda **x: graphdata(True, str),
+                # {jsdata} will be passed to |json, so it must be in utf-8
+                jsdata=lambda **x: graphdata(True, encoding.fromlocal),
                 nodes=lambda **x: graphdata(False, str),
                 node=ctx.hex(), changenav=changenav)
 
diff --git a/mercurial/templatefilters.py b/mercurial/templatefilters.py
--- a/mercurial/templatefilters.py
+++ b/mercurial/templatefilters.py
@@ -197,15 +197,8 @@  def json(obj):
         return {None: 'null', False: 'false', True: 'true'}[obj]
     elif isinstance(obj, int) or isinstance(obj, float):
         return str(obj)
-    elif isinstance(obj, encoding.localstr):
-        u = encoding.fromlocal(obj).decode('utf-8')  # can round-trip
-        return '"%s"' % jsonescape(u)
     elif isinstance(obj, str):
-        # no encoding.fromlocal() because it may abort if obj can't be decoded
-        u = unicode(obj, encoding.encoding, 'replace')
-        return '"%s"' % jsonescape(u)
-    elif isinstance(obj, unicode):
-        return '"%s"' % jsonescape(obj)
+        return '"%s"' % encoding.jsonescape(obj, paranoid=True)
     elif util.safehasattr(obj, 'keys'):
         out = []
         for k, v in sorted(obj.iteritems()):
diff --git a/mercurial/templates/json/map b/mercurial/templates/json/map
--- a/mercurial/templates/json/map
+++ b/mercurial/templates/json/map
@@ -8,26 +8,26 @@  shortlog = changelist.tmpl
 changelistentry = '\{
   "node": {node|json},
   "date": {date|json},
-  "desc": {desc|json},
+  "desc": {desc|utf8|json},
   "bookmarks": [{join(bookmarks%changelistentryname, ", ")}],
   "tags": [{join(tags%changelistentryname, ", ")}],
-  "user": {author|json}
+  "user": {author|utf8|json}
   }'
-changelistentryname = '{name|json}'
+changelistentryname = '{name|utf8|json}'
 changeset = '\{
   "node": {node|json},
   "date": {date|json},
-  "desc": {desc|json},
+  "desc": {desc|utf8|json},
   "branch": {if(branch, branch%changesetbranch, "default"|json)},
   "bookmarks": [{join(changesetbookmark, ", ")}],
   "tags": [{join(changesettag, ", ")}],
-  "user": {author|json},
+  "user": {author|utf8|json},
   "parents": [{join(parent%changesetparent, ", ")}],
   "phase": {phase|json}
   }'
-changesetbranch = '{name|json}'
-changesetbookmark = '{bookmark|json}'
-changesettag = '{tag|json}'
+changesetbranch = '{name|utf8|json}'
+changesetbookmark = '{bookmark|utf8|json}'
+changesettag = '{tag|utf8|json}'
 changesetparent = '{node|json}'
 manifest = '\{
   "node": {node|json},
@@ -37,7 +37,7 @@  manifest = '\{
   "bookmarks": [{join(bookmarks%name, ", ")}],
   "tags": [{join(tags%name, ", ")}]
   }'
-name = '{name|json}'
+name = '{name|utf8|json}'
 direntry = '\{
   "abspath": {path|json},
   "basename": {basename|json},
@@ -55,7 +55,7 @@  tags = '\{
   "tags": [{join(entriesnotip%tagentry, ", ")}]
   }'
 tagentry = '\{
-  "tag": {tag|json},
+  "tag": {tag|utf8|json},
   "node": {node|json},
   "date": {date|json}
   }'
@@ -64,7 +64,7 @@  bookmarks = '\{
   "bookmarks": [{join(entries%bookmarkentry, ", ")}]
   }'
 bookmarkentry = '\{
-  "bookmark": {bookmark|json},
+  "bookmark": {bookmark|utf8|json},
   "node": {node|json},
   "date": {date|json}
   }'
@@ -72,7 +72,7 @@  branches = '\{
   "branches": [{join(entries%branchentry, ", ")}]
   }'
 branchentry = '\{
-  "branch": {branch|json},
+  "branch": {branch|utf8|json},
   "node": {node|json},
   "date": {date|json},
   "status": {status|json}
@@ -82,8 +82,8 @@  filediff = '\{
   "path": {file|json},
   "node": {node|json},
   "date": {date|json},
-  "desc": {desc|json},
-  "author": {author|json},
+  "desc": {desc|utf8|json},
+  "author": {author|utf8|json},
   "parents": [{join(parent%changesetparent, ", ")}],
   "children": [{join(child%changesetparent, ", ")}],
   "diff": [{join(diff%diffblock, ", ")}]
@@ -116,8 +116,8 @@  filecomparison = '\{
   "path": {file|json},
   "node": {node|json},
   "date": {date|json},
-  "desc": {desc|json},
-  "author": {author|json},
+  "desc": {desc|utf8|json},
+  "author": {author|utf8|json},
   "parents": [{join(parent%changesetparent, ", ")}],
   "children": [{join(child%changesetparent, ", ")}],
   "leftnode": {leftnode|json},
@@ -137,9 +137,9 @@  comparisonline = '\{
 fileannotate = '\{
   "abspath": {file|json},
   "node": {node|json},
-  "author": {author|json},
+  "author": {author|utf8|json},
   "date": {date|json},
-  "desc": {desc|json},
+  "desc": {desc|utf8|json},
   "parents": [{join(parent%changesetparent, ", ")}],
   "children": [{join(child%changesetparent, ", ")}],
   "permissions": {permissions|json},
@@ -147,8 +147,8 @@  fileannotate = '\{
   }'
 fileannotation = '\{
   "node": {node|json},
-  "author": {author|json},
-  "desc": {desc|json},
+  "author": {author|utf8|json},
+  "desc": {desc|utf8|json},
   "abspath": {file|json},
   "targetline": {targetline|json},
   "line": {line|json},
@@ -163,12 +163,12 @@  helptopics = '\{
   "othercommands": [{join(othercommands%helptopicentry, ", ")}]
   }'
 helptopicentry = '\{
-  "topic": {topic|json},
-  "summary": {summary|json}
+  "topic": {topic|utf8|json},
+  "summary": {summary|utf8|json}
   }'
 help = '\{
-  "topic": {topic|json},
-  "rawdoc": {doc|json}
+  "topic": {topic|utf8|json},
+  "rawdoc": {doc|utf8|json}
   }'
 filenodelink = ''
 filenolink = ''
diff --git a/tests/test-command-template.t b/tests/test-command-template.t
--- a/tests/test-command-template.t
+++ b/tests/test-command-template.t
@@ -3542,6 +3542,11 @@  Test broken string escapes:
   hg: parse error: invalid \x escape
   [255]
 
+json filter should escape HTML tags so that the output can be embedded in hgweb:
+
+  $ hg log -T "{'<foo@example.org>'|json}\n" -R a -l1
+  "\u003cfoo@example.org\u003e"
+
 Set up repository for non-ascii encoding tests:
 
   $ hg init nonascii
@@ -3558,11 +3563,12 @@  json filter should try round-trip conver
   $ HGENCODING=ascii hg log -T "{branch|json}\n" -r0
   "\u00e9"
 
-json filter should not abort if it can't decode bytes:
-(not sure the current behavior is right; we might want to use utf-8b encoding?)
+json filter takes input as utf-8b:
 
   $ HGENCODING=ascii hg log -T "{'`cat utf-8`'|json}\n" -l1
-  "\ufffd\ufffd"
+  "\u00e9"
+  $ HGENCODING=ascii hg log -T "{'`cat latin1`'|json}\n" -l1
+  "\udce9"
 
 utf8 filter: