Patchwork py3: have registrar process docstrings in bytes

login
register
mail settings
Submitter Yuya Nishihara
Date April 4, 2017, 3:56 p.m.
Message ID <de8d9a4ba73002af9d29.1491321414@mimosa>
Download mbox | patch
Permalink /patch/19952/
State Accepted
Headers show

Comments

Yuya Nishihara - April 4, 2017, 3:56 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1491320098 -32400
#      Wed Apr 05 00:34:58 2017 +0900
# Node ID de8d9a4ba73002af9d2992a15e0a196c2fcaf217
# Parent  07edd8c2f59a8f19283d038ea3af5fa5532bd6dc
py3: have registrar process docstrings in bytes

Mixing bytes and unicode creates a mess. Do things in bytes as possible.

New sysbytes() helper only takes care of ASCII characters, but avoids raising
nasty unicode exception. This is the same design principle as sysstr().
Augie Fackler - April 5, 2017, 7:53 p.m.
On Wed, Apr 05, 2017 at 12:56:54AM +0900, Yuya Nishihara wrote:
> # HG changeset patch
> # User Yuya Nishihara <yuya@tcha.org>
> # Date 1491320098 -32400
> #      Wed Apr 05 00:34:58 2017 +0900
> # Node ID de8d9a4ba73002af9d2992a15e0a196c2fcaf217
> # Parent  07edd8c2f59a8f19283d038ea3af5fa5532bd6dc
> py3: have registrar process docstrings in bytes

Queued, thanks.

>
> Mixing bytes and unicode creates a mess. Do things in bytes as possible.
>
> New sysbytes() helper only takes care of ASCII characters, but avoids raising
> nasty unicode exception. This is the same design principle as sysstr().
>
> diff --git a/hgext/show.py b/hgext/show.py
> --- a/hgext/show.py
> +++ b/hgext/show.py
> @@ -19,6 +19,7 @@ from mercurial import (
>      cmdutil,
>      commands,
>      error,
> +    pycompat,
>      registrar,
>  )
>
> @@ -133,5 +134,5 @@ def showbookmarks(ui, repo, fm):
>  # TODO make this more robust.
>  longest = max(map(len, showview._table.keys()))
>  for key in sorted(showview._table.keys()):
> -    cmdtable['show'][0].__doc__ += ' %s   %s\n' % (
> -        key.ljust(longest), showview._table[key]._origdoc)
> +    cmdtable['show'][0].__doc__ += pycompat.sysstr(' %s   %s\n' % (
> +        key.ljust(longest), showview._table[key]._origdoc))
> diff --git a/mercurial/pycompat.py b/mercurial/pycompat.py
> --- a/mercurial/pycompat.py
> +++ b/mercurial/pycompat.py
> @@ -142,6 +142,14 @@ if ispy3:
>          """Iterate bytes as if it were a str object of Python 2"""
>          return map(bytechr, s)
>
> +    def sysbytes(s):
> +        """Convert an internal str (e.g. keyword, __doc__) back to bytes
> +
> +        This never raises UnicodeEncodeError, but only ASCII characters
> +        can be round-trip by sysstr(sysbytes(s)).
> +        """
> +        return s.encode(u'utf-8')
> +
>      def sysstr(s):
>          """Return a keyword str to be passed to Python functions such as
>          getattr() and str.encode()
> @@ -210,6 +218,7 @@ else:
>      bytechr = chr
>      bytestr = str
>      iterbytestr = iter
> +    sysbytes = identity
>      sysstr = identity
>
>      # Partial backport from os.py in Python 3, which only accepts bytes.
> diff --git a/mercurial/registrar.py b/mercurial/registrar.py
> --- a/mercurial/registrar.py
> +++ b/mercurial/registrar.py
> @@ -56,9 +56,9 @@ class _funcregistrarbase(object):
>              raise error.ProgrammingError(msg)
>
>          if func.__doc__ and not util.safehasattr(func, '_origdoc'):
> -            doc = func.__doc__.strip()
> +            doc = pycompat.sysbytes(func.__doc__).strip()
>              func._origdoc = doc
> -            func.__doc__ = self._formatdoc(decl, doc)
> +            func.__doc__ = pycompat.sysstr(self._formatdoc(decl, doc))
>
>          self._table[name] = func
>          self._extrasetup(name, func, *args, **kwargs)
> @@ -127,7 +127,7 @@ class revsetpredicate(_funcregistrarbase
>      Otherwise, explicit 'revset.loadpredicate()' is needed.
>      """
>      _getname = _funcregistrarbase._parsefuncdecl
> -    _docformat = pycompat.sysstr("``%s``\n    %s")
> +    _docformat = "``%s``\n    %s"
>
>      def _extrasetup(self, name, func, safe=False, takeorder=False):
>          func._safe = safe
> @@ -166,7 +166,7 @@ class filesetpredicate(_funcregistrarbas
>      Otherwise, explicit 'fileset.loadpredicate()' is needed.
>      """
>      _getname = _funcregistrarbase._parsefuncdecl
> -    _docformat = pycompat.sysstr("``%s``\n    %s")
> +    _docformat = "``%s``\n    %s"
>
>      def _extrasetup(self, name, func, callstatus=False, callexisting=False):
>          func._callstatus = callstatus
> @@ -175,7 +175,7 @@ class filesetpredicate(_funcregistrarbas
>  class _templateregistrarbase(_funcregistrarbase):
>      """Base of decorator to register functions as template specific one
>      """
> -    _docformat = pycompat.sysstr(":%s: %s")
> +    _docformat = ":%s: %s"
>
>  class templatekeyword(_templateregistrarbase):
>      """Decorator to register template keyword
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Patch

diff --git a/hgext/show.py b/hgext/show.py
--- a/hgext/show.py
+++ b/hgext/show.py
@@ -19,6 +19,7 @@  from mercurial import (
     cmdutil,
     commands,
     error,
+    pycompat,
     registrar,
 )
 
@@ -133,5 +134,5 @@  def showbookmarks(ui, repo, fm):
 # TODO make this more robust.
 longest = max(map(len, showview._table.keys()))
 for key in sorted(showview._table.keys()):
-    cmdtable['show'][0].__doc__ += ' %s   %s\n' % (
-        key.ljust(longest), showview._table[key]._origdoc)
+    cmdtable['show'][0].__doc__ += pycompat.sysstr(' %s   %s\n' % (
+        key.ljust(longest), showview._table[key]._origdoc))
diff --git a/mercurial/pycompat.py b/mercurial/pycompat.py
--- a/mercurial/pycompat.py
+++ b/mercurial/pycompat.py
@@ -142,6 +142,14 @@  if ispy3:
         """Iterate bytes as if it were a str object of Python 2"""
         return map(bytechr, s)
 
+    def sysbytes(s):
+        """Convert an internal str (e.g. keyword, __doc__) back to bytes
+
+        This never raises UnicodeEncodeError, but only ASCII characters
+        can be round-trip by sysstr(sysbytes(s)).
+        """
+        return s.encode(u'utf-8')
+
     def sysstr(s):
         """Return a keyword str to be passed to Python functions such as
         getattr() and str.encode()
@@ -210,6 +218,7 @@  else:
     bytechr = chr
     bytestr = str
     iterbytestr = iter
+    sysbytes = identity
     sysstr = identity
 
     # Partial backport from os.py in Python 3, which only accepts bytes.
diff --git a/mercurial/registrar.py b/mercurial/registrar.py
--- a/mercurial/registrar.py
+++ b/mercurial/registrar.py
@@ -56,9 +56,9 @@  class _funcregistrarbase(object):
             raise error.ProgrammingError(msg)
 
         if func.__doc__ and not util.safehasattr(func, '_origdoc'):
-            doc = func.__doc__.strip()
+            doc = pycompat.sysbytes(func.__doc__).strip()
             func._origdoc = doc
-            func.__doc__ = self._formatdoc(decl, doc)
+            func.__doc__ = pycompat.sysstr(self._formatdoc(decl, doc))
 
         self._table[name] = func
         self._extrasetup(name, func, *args, **kwargs)
@@ -127,7 +127,7 @@  class revsetpredicate(_funcregistrarbase
     Otherwise, explicit 'revset.loadpredicate()' is needed.
     """
     _getname = _funcregistrarbase._parsefuncdecl
-    _docformat = pycompat.sysstr("``%s``\n    %s")
+    _docformat = "``%s``\n    %s"
 
     def _extrasetup(self, name, func, safe=False, takeorder=False):
         func._safe = safe
@@ -166,7 +166,7 @@  class filesetpredicate(_funcregistrarbas
     Otherwise, explicit 'fileset.loadpredicate()' is needed.
     """
     _getname = _funcregistrarbase._parsefuncdecl
-    _docformat = pycompat.sysstr("``%s``\n    %s")
+    _docformat = "``%s``\n    %s"
 
     def _extrasetup(self, name, func, callstatus=False, callexisting=False):
         func._callstatus = callstatus
@@ -175,7 +175,7 @@  class filesetpredicate(_funcregistrarbas
 class _templateregistrarbase(_funcregistrarbase):
     """Base of decorator to register functions as template specific one
     """
-    _docformat = pycompat.sysstr(":%s: %s")
+    _docformat = ":%s: %s"
 
 class templatekeyword(_templateregistrarbase):
     """Decorator to register template keyword