Submitter | Pulkit Goyal |
---|---|
Date | Dec. 8, 2016, 7:12 p.m. |
Message ID | <e6e1c531a879c091caea.1481224357@pulkit-goyal> |
Download | mbox | patch |
Permalink | /patch/17878/ |
State | Superseded |
Headers | show |
Comments
On Fri, 09 Dec 2016 00:42:37 +0530, Pulkit Goyal wrote: > # HG changeset patch > # User Pulkit Goyal <7895pulkit@gmail.com> > # Date 1480986396 -19800 > # Tue Dec 06 06:36:36 2016 +0530 > # Node ID e6e1c531a879c091caeaf7597744e98bcfbb41c9 > # Parent a2b053b8d31aa01b1dcae2d3001b060ff59e8a68 > py3: make a bytes version of getopt.getopt() > + # getopt.getopt() on Python 3 deals with unicodes internally so we cannot > + # pass bytes there. Passing unicodes will result in unicodes as return > + # values which we need to convert again to bytes. > + def getoptb(args, shortlist, namelist): > + # There are chances when args, shorlist or namelist variables can be > + # unicodes, because maybe they are result of sys.argv like in statprof > + # or some other reasons. So it's better to check instance rather than > + # getting an AttributeError. > + args = [a.decode('latin-1') if isinstance(a, bytes) else a > + for a in args] > + if isinstance(shortlist, bytes): > + shortlist = shortlist.decode('latin-1') > + namelist = [a.decode('latin-1') if isinstance(a, bytes) else a > + for a in namelist] IMO, passing unicode variables is invalid use of getoptb(). If they were unicode type, it would be wrong to convert them back by .encode('latin-1'). We have no idea what's the expected encoding. > + opts, args = getopt.getopt(args, shortlist, namelist) > + # Returned value is always str, so no need to check instance. > + opts = [(a[0].enocde('latin-1'), a[1].enocde('latin-1')) > + for a in opts] > + args = [a.encode('latin-1') for a in args] > + return opts, args
On Fri, Dec 9, 2016 at 4:16 PM, Yuya Nishihara <yuya@tcha.org> wrote: > On Fri, 09 Dec 2016 00:42:37 +0530, Pulkit Goyal wrote: >> # HG changeset patch >> # User Pulkit Goyal <7895pulkit@gmail.com> >> # Date 1480986396 -19800 >> # Tue Dec 06 06:36:36 2016 +0530 >> # Node ID e6e1c531a879c091caeaf7597744e98bcfbb41c9 >> # Parent a2b053b8d31aa01b1dcae2d3001b060ff59e8a68 >> py3: make a bytes version of getopt.getopt() > >> + # getopt.getopt() on Python 3 deals with unicodes internally so we cannot >> + # pass bytes there. Passing unicodes will result in unicodes as return >> + # values which we need to convert again to bytes. >> + def getoptb(args, shortlist, namelist): >> + # There are chances when args, shorlist or namelist variables can be >> + # unicodes, because maybe they are result of sys.argv like in statprof >> + # or some other reasons. So it's better to check instance rather than >> + # getting an AttributeError. >> + args = [a.decode('latin-1') if isinstance(a, bytes) else a >> + for a in args] >> + if isinstance(shortlist, bytes): >> + shortlist = shortlist.decode('latin-1') >> + namelist = [a.decode('latin-1') if isinstance(a, bytes) else a >> + for a in namelist] > > IMO, passing unicode variables is invalid use of getoptb(). If they were > unicode type, it would be wrong to convert them back by .encode('latin-1'). > We have no idea what's the expected encoding. Okay, that means getoptb() is espected to get bytes only and since we will be passing bytes, we can safely use .encode('latin-1') because we decoded them the same way before passing into getopt.getopt(). I will resend a V4. >> + opts, args = getopt.getopt(args, shortlist, namelist) >> + # Returned value is always str, so no need to check instance. >> + opts = [(a[0].enocde('latin-1'), a[1].enocde('latin-1')) >> + for a in opts] >> + args = [a.encode('latin-1') for a in args] >> + return opts, args
Patch
diff -r a2b053b8d31a -r e6e1c531a879 mercurial/fancyopts.py --- a/mercurial/fancyopts.py Tue Dec 06 11:44:49 2016 +0000 +++ b/mercurial/fancyopts.py Tue Dec 06 06:36:36 2016 +0530 @@ -7,10 +7,11 @@ from __future__ import absolute_import -import getopt - from .i18n import _ -from . import error +from . import ( + error, + pycompat, +) # Set of flags to not apply boolean negation logic on nevernegate = set([ @@ -34,13 +35,14 @@ stopindex = args.index('--') extraargs = args[stopindex + 1:] args = args[:stopindex] - opts, parseargs = getopt.getopt(args, options, longoptions) + opts, parseargs = pycompat.getoptb(args, options, longoptions) args = [] while parseargs: arg = parseargs.pop(0) if arg and arg[0] == '-' and len(arg) > 1: parseargs.insert(0, arg) - topts, newparseargs = getopt.getopt(parseargs, options, longoptions) + topts, newparseargs = pycompat.getoptb(parseargs,\ + options, longoptions) opts = opts + topts parseargs = newparseargs else: @@ -125,7 +127,7 @@ if gnu: parse = gnugetopt else: - parse = getopt.getopt + parse = pycompat.getoptb opts, args = parse(args, shortlist, namelist) # transfer result to state diff -r a2b053b8d31a -r e6e1c531a879 mercurial/pycompat.py --- a/mercurial/pycompat.py Tue Dec 06 11:44:49 2016 +0000 +++ b/mercurial/pycompat.py Tue Dec 06 06:36:36 2016 +0530 @@ -10,6 +10,7 @@ from __future__ import absolute_import +import getopt import os import sys @@ -87,6 +88,27 @@ setattr = _wrapattrfunc(builtins.setattr) xrange = builtins.range + # getopt.getopt() on Python 3 deals with unicodes internally so we cannot + # pass bytes there. Passing unicodes will result in unicodes as return + # values which we need to convert again to bytes. + def getoptb(args, shortlist, namelist): + # There are chances when args, shorlist or namelist variables can be + # unicodes, because maybe they are result of sys.argv like in statprof + # or some other reasons. So it's better to check instance rather than + # getting an AttributeError. + args = [a.decode('latin-1') if isinstance(a, bytes) else a + for a in args] + if isinstance(shortlist, bytes): + shortlist = shortlist.decode('latin-1') + namelist = [a.decode('latin-1') if isinstance(a, bytes) else a + for a in namelist] + opts, args = getopt.getopt(args, shortlist, namelist) + # Returned value is always str, so no need to check instance. + opts = [(a[0].enocde('latin-1'), a[1].enocde('latin-1')) + for a in opts] + args = [a.encode('latin-1') for a in args] + return opts, args + else: def sysstr(s): return s @@ -106,6 +128,9 @@ def fsdecode(filename): return filename + def getoptb(args, shortlist, namelist): + return getopt.getopt(args, shortlist, namelist) + osname = os.name ospathsep = os.pathsep ossep = os.sep diff -r a2b053b8d31a -r e6e1c531a879 mercurial/statprof.py --- a/mercurial/statprof.py Tue Dec 06 11:44:49 2016 +0000 +++ b/mercurial/statprof.py Tue Dec 06 06:36:36 2016 +0530 @@ -116,6 +116,10 @@ import threading import time +from . import ( + pycompat, +) + defaultdict = collections.defaultdict contextmanager = contextlib.contextmanager @@ -771,7 +775,7 @@ # process options try: - opts, args = getopt.getopt(sys.argv[optstart:], "hl:f:o:p:", + opts, args = pycompat.getoptb(sys.argv[optstart:], "hl:f:o:p:", ["help", "limit=", "file=", "output-file=", "script-path="]) except getopt.error as msg: print(msg)