Patchwork [3,of,9,V2] encoding: use unicode literal for argument to encode()

login
register
mail settings
Submitter Gregory Szorc
Date June 25, 2016, 9:11 p.m.
Message ID <8701e580adfc62a91241.1466889077@ubuntu-vm-main>
Download mbox | patch
Permalink /patch/15616/
State Superseded
Headers show

Comments

Gregory Szorc - June 25, 2016, 9:11 p.m.
# HG changeset patch
# User Gregory Szorc <gregory.szorc@gmail.com>
# Date 1466880655 25200
#      Sat Jun 25 11:50:55 2016 -0700
# Node ID 8701e580adfc62a91241440d4a9a6b524ad48996
# Parent  6ea0378d4330a6092f01dfd073757f029ce5060b
encoding: use unicode literal for argument to encode()

Python 3 barfs on a byte instance being passed to encode(), which is
what happens after string literals are converted to byte literals
via our custom module loader on Python 3.

In theory, we could teach our source transformer to recognize
encode(<literal>) and decode(<literal>) and rewrite intelligently.
However, I feel somewhat strongly that source transformation should
be consistent and not have one-offs, as this keeps things simple
both for source transformation and for humans hacking on the source
code.

Patch

diff --git a/mercurial/encoding.py b/mercurial/encoding.py
--- a/mercurial/encoding.py
+++ b/mercurial/encoding.py
@@ -18,17 +18,17 @@  from . import (
 )
 
 if sys.version_info[0] >= 3:
     unichr = chr
 
 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
 # "Unicode Subtleties"), so we need to ignore them in some places for
 # sanity.
-_ignore = [unichr(int(x, 16)).encode("utf-8") for x in
+_ignore = [unichr(int(x, 16)).encode(u'utf-8') for x in
            "200c 200d 200e 200f 202a 202b 202c 202d 202e "
            "206a 206b 206c 206d 206e 206f feff".split()]
 # verify the next function will work
 if sys.version_info[0] >= 3:
     assert set(i[0] for i in _ignore) == set([ord(b'\xe2'), ord(b'\xef')])
 else:
     assert set(i[0] for i in _ignore) == set(["\xe2", "\xef"])
 
diff --git a/tests/test-check-py3-compat.t b/tests/test-check-py3-compat.t
--- a/tests/test-check-py3-compat.t
+++ b/tests/test-check-py3-compat.t
@@ -19,99 +19,99 @@ 
   tests/md5sum.py not using absolute_import
   tests/readlink.py not using absolute_import
   tests/run-tests.py not using absolute_import
   tests/test-demandimport.py not using absolute_import
 
 #if py3exe
   $ hg files 'set:(**.py)' | sed 's|\\|/|g' | xargs $PYTHON3 contrib/check-py3-compat.py
   doc/hgmanpage.py: invalid syntax: invalid syntax (<unknown>, line *) (glob)
-  hgext/acl.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/automv.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/blackbox.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
+  hgext/acl.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/automv.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/blackbox.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
   hgext/bugzilla.py: error importing module: <ImportError> No module named 'urlparse' (line *) (glob)
-  hgext/censor.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
+  hgext/censor.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
   hgext/chgserver.py: error importing module: <ImportError> No module named 'SocketServer' (line *) (glob)
-  hgext/children.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/churn.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/clonebundles.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
+  hgext/children.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/churn.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/clonebundles.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
   hgext/color.py: invalid syntax: invalid syntax (<unknown>, line *) (glob)
-  hgext/convert/bzr.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/convert/common.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/convert/convcmd.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/convert/cvs.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/convert/cvsps.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/convert/darcs.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/convert/filemap.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/convert/git.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/convert/gnuarch.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/convert/hg.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/convert/monotone.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/convert/p4.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/convert/subversion.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
+  hgext/convert/bzr.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/convert/common.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/convert/convcmd.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/convert/cvs.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/convert/cvsps.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/convert/darcs.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/convert/filemap.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/convert/git.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/convert/gnuarch.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/convert/hg.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/convert/monotone.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/convert/p4.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/convert/subversion.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
   hgext/convert/transport.py: error importing module: <ImportError> No module named 'svn.client' (line *) (glob)
-  hgext/eol.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/extdiff.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/factotum.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/fetch.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/fsmonitor/state.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/fsmonitor/watchmanclient.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/gpg.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/graphlog.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/hgk.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/highlight/highlight.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/histedit.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/keyword.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/largefiles/basestore.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/largefiles/lfcommands.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/largefiles/lfutil.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/largefiles/localstore.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/largefiles/overrides.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/largefiles/proto.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/largefiles/remotestore.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/largefiles/reposetup.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/largefiles/storefactory.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/largefiles/uisetup.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
+  hgext/eol.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/extdiff.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/factotum.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/fetch.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/fsmonitor/state.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/fsmonitor/watchmanclient.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/gpg.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/graphlog.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/hgk.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/highlight/highlight.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/histedit.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/keyword.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/largefiles/basestore.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/largefiles/lfcommands.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/largefiles/lfutil.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/largefiles/localstore.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/largefiles/overrides.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/largefiles/proto.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/largefiles/remotestore.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/largefiles/reposetup.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/largefiles/storefactory.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/largefiles/uisetup.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
   hgext/largefiles/wirestore.py: error importing module: <SystemError> Parent module 'hgext.largefiles' not loaded, cannot perform relative import (line *) (glob)
-  hgext/mq.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/notify.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/pager.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/patchbomb.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/purge.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/rebase.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/record.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/relink.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/schemes.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/share.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/shelve.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/strip.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/transplant.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/win32mbcs.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  hgext/win32text.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
+  hgext/mq.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/notify.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/pager.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/patchbomb.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/purge.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/rebase.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/record.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/relink.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/schemes.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/share.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/shelve.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/strip.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/transplant.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/win32mbcs.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  hgext/win32text.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
   mercurial/archival.py: invalid syntax: invalid syntax (<unknown>, line *) (glob)
-  mercurial/bookmarks.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  mercurial/branchmap.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
+  mercurial/bookmarks.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  mercurial/branchmap.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
   mercurial/bundle2.py: invalid syntax: invalid syntax (<unknown>, line *) (glob)
-  mercurial/bundlerepo.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  mercurial/byterange.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  mercurial/changegroup.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  mercurial/changelog.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  mercurial/cmdutil.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
+  mercurial/bundlerepo.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  mercurial/byterange.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  mercurial/changegroup.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  mercurial/changelog.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  mercurial/cmdutil.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
   mercurial/commands.py: invalid syntax: invalid syntax (<unknown>, line *) (glob)
   mercurial/commandserver.py: error importing module: <ImportError> No module named 'SocketServer' (line *) (glob)
-  mercurial/config.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  mercurial/context.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  mercurial/copies.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  mercurial/crecord.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  mercurial/dagparser.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  mercurial/dagutil.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  mercurial/destutil.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  mercurial/dirstate.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  mercurial/discovery.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
-  mercurial/dispatch.py: error importing: <TypeError> encode() argument 1 must be str, not bytes (error at encoding.py:*) (glob)
+  mercurial/config.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  mercurial/context.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  mercurial/copies.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  mercurial/crecord.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  mercurial/dagparser.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  mercurial/dagutil.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  mercurial/destutil.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  mercurial/dirstate.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  mercurial/discovery.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
+  mercurial/dispatch.py: error importing: <TypeError> str expected, not bytes (error at encoding.py:*) (glob)
   mercurial/exchange.py: error importing: <TypeError> getattr(): attribute name must be string (error at i18n.py:*) (glob)
   mercurial/extensions.py: error importing: <TypeError> getattr(): attribute name must be string (error at i18n.py:*) (glob)
   mercurial/fancyopts.py: error importing: <TypeError> getattr(): attribute name must be string (error at i18n.py:*) (glob)
   mercurial/filelog.py: error importing: <TypeError> getattr(): attribute name must be string (error at i18n.py:*) (glob)
   mercurial/filemerge.py: error importing: <TypeError> getattr(): attribute name must be string (error at i18n.py:*) (glob)
   mercurial/fileset.py: error importing: <TypeError> getattr(): attribute name must be string (error at i18n.py:*) (glob)
   mercurial/formatter.py: error importing: <TypeError> getattr(): attribute name must be string (error at i18n.py:*) (glob)
   mercurial/graphmod.py: error importing: <TypeError> getattr(): attribute name must be string (error at i18n.py:*) (glob)