Patchwork [1,of,2,STABLE,V2] fancyopts: add early-options parser compatible with getopt()

login
register
mail settings
Submitter Yuya Nishihara
Date Nov. 24, 2017, 3:58 p.m.
Message ID <78d25e7afffe6ce150da.1511539085@mimosa>
Download mbox | patch
Permalink /patch/25736/
State Accepted
Headers show

Comments

Yuya Nishihara - Nov. 24, 2017, 3:58 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1511442293 -32400
#      Thu Nov 23 22:04:53 2017 +0900
# Branch stable
# Node ID 78d25e7afffe6ce150da8b1ca32418d6394b0442
# Parent  02845f7441aff30bc01975a5881cabfa922c12d4
fancyopts: add early-options parser compatible with getopt()

The next patch will add a flag for strict parsing of early options, where
we'll have to parse all early options at once instead of processing them
one-by-one by dispatch._earlygetopt(). That's why I decided to hook
fancyopts().

All dispatch._early*opt() functions is planned to be replaced with this
function. But in this stable series, only the strict mode will be handled
by fancyopts.earlygetopt().

Patch

diff --git a/mercurial/fancyopts.py b/mercurial/fancyopts.py
--- a/mercurial/fancyopts.py
+++ b/mercurial/fancyopts.py
@@ -7,6 +7,8 @@ 
 
 from __future__ import absolute_import
 
+import functools
+
 from .i18n import _
 from . import (
     error,
@@ -24,6 +26,179 @@  nevernegate = {
     'version',
 }
 
+def _earlyoptarg(arg, shortlist, namelist):
+    """Check if the given arg is a valid unabbreviated option
+
+    Returns (flag_str, has_embedded_value?, embedded_value, takes_value?)
+
+    >>> def opt(arg):
+    ...     return _earlyoptarg(arg, b'R:q', [b'cwd=', b'debugger'])
+
+    long form:
+
+    >>> opt(b'--cwd')
+    ('--cwd', False, '', True)
+    >>> opt(b'--cwd=')
+    ('--cwd', True, '', True)
+    >>> opt(b'--cwd=foo')
+    ('--cwd', True, 'foo', True)
+    >>> opt(b'--debugger')
+    ('--debugger', False, '', False)
+    >>> opt(b'--debugger=')  # invalid but parsable
+    ('--debugger', True, '', False)
+
+    short form:
+
+    >>> opt(b'-R')
+    ('-R', False, '', True)
+    >>> opt(b'-Rfoo')
+    ('-R', True, 'foo', True)
+    >>> opt(b'-q')
+    ('-q', False, '', False)
+    >>> opt(b'-qfoo')  # invalid but parsable
+    ('-q', True, 'foo', False)
+
+    unknown or invalid:
+
+    >>> opt(b'--unknown')
+    ('', False, '', False)
+    >>> opt(b'-u')
+    ('', False, '', False)
+    >>> opt(b'-ufoo')
+    ('', False, '', False)
+    >>> opt(b'--')
+    ('', False, '', False)
+    >>> opt(b'-')
+    ('', False, '', False)
+    >>> opt(b'-:')
+    ('', False, '', False)
+    >>> opt(b'-:foo')
+    ('', False, '', False)
+    """
+    if arg.startswith('--'):
+        flag, eq, val = arg.partition('=')
+        if flag[2:] in namelist:
+            return flag, bool(eq), val, False
+        if flag[2:] + '=' in namelist:
+            return flag, bool(eq), val, True
+    elif arg.startswith('-') and arg != '-' and not arg.startswith('-:'):
+        flag, val = arg[:2], arg[2:]
+        i = shortlist.find(flag[1:])
+        if i >= 0:
+            return flag, bool(val), val, shortlist.startswith(':', i + 1)
+    return '', False, '', False
+
+def earlygetopt(args, shortlist, namelist, gnu=False, keepsep=False):
+    """Parse options like getopt, but ignores unknown options and abbreviated
+    forms
+
+    If gnu=False, this stops processing options as soon as a non/unknown-option
+    argument is encountered. Otherwise, option and non-option arguments may be
+    intermixed, and unknown-option arguments are taken as non-option.
+
+    If keepsep=True, '--' won't be removed from the list of arguments left.
+    This is useful for stripping early options from a full command arguments.
+
+    >>> def get(args, gnu=False, keepsep=False):
+    ...     return earlygetopt(args, b'R:q', [b'cwd=', b'debugger'],
+    ...                        gnu=gnu, keepsep=keepsep)
+
+    default parsing rules for early options:
+
+    >>> get([b'x', b'--cwd', b'foo', b'-Rbar', b'-q', b'y'], gnu=True)
+    ([('--cwd', 'foo'), ('-R', 'bar'), ('-q', '')], ['x', 'y'])
+    >>> get([b'x', b'--cwd=foo', b'y', b'-R', b'bar', b'--debugger'], gnu=True)
+    ([('--cwd', 'foo'), ('-R', 'bar'), ('--debugger', '')], ['x', 'y'])
+    >>> get([b'--unknown', b'--cwd=foo', b'--', '--debugger'], gnu=True)
+    ([('--cwd', 'foo')], ['--unknown', '--debugger'])
+
+    restricted parsing rules (early options must come first):
+
+    >>> get([b'--cwd', b'foo', b'-Rbar', b'x', b'-q', b'y'], gnu=False)
+    ([('--cwd', 'foo'), ('-R', 'bar')], ['x', '-q', 'y'])
+    >>> get([b'--cwd=foo', b'x', b'y', b'-R', b'bar', b'--debugger'], gnu=False)
+    ([('--cwd', 'foo')], ['x', 'y', '-R', 'bar', '--debugger'])
+    >>> get([b'--unknown', b'--cwd=foo', b'--', '--debugger'], gnu=False)
+    ([], ['--unknown', '--cwd=foo', '--debugger'])
+
+    stripping early options (without loosing '--'):
+
+    >>> get([b'x', b'-Rbar', b'--', '--debugger'], gnu=True, keepsep=True)[1]
+    ['x', '--', '--debugger']
+
+    last argument:
+
+    >>> get([b'--cwd'])
+    ([], ['--cwd'])
+    >>> get([b'--cwd=foo'])
+    ([('--cwd', 'foo')], [])
+    >>> get([b'-R'])
+    ([], ['-R'])
+    >>> get([b'-Rbar'])
+    ([('-R', 'bar')], [])
+    >>> get([b'-q'])
+    ([('-q', '')], [])
+    >>> get([b'-q', b'--'])
+    ([('-q', '')], [])
+
+    value passed to bool options:
+
+    >>> get([b'--debugger=foo', b'x'])
+    ([], ['--debugger=foo', 'x'])
+    >>> get([b'-qfoo', b'x'])
+    ([], ['-qfoo', 'x'])
+
+    short option isn't separated with '=':
+
+    >>> get([b'-R=bar'])
+    ([('-R', '=bar')], [])
+
+    ':' may be in shortlist, but shouldn't be taken as an option letter:
+
+    >>> get([b'-:', b'y'])
+    ([], ['-:', 'y'])
+
+    '-' is a valid non-option argument:
+
+    >>> get([b'-', b'y'])
+    ([], ['-', 'y'])
+    """
+    # ignoring everything just after '--' isn't correct as '--' may be an
+    # option value (e.g. ['-R', '--']), but we do that consistently.
+    try:
+        argcount = args.index('--')
+    except ValueError:
+        argcount = len(args)
+
+    parsedopts = []
+    parsedargs = []
+    pos = 0
+    while pos < argcount:
+        arg = args[pos]
+        flag, hasval, val, takeval = _earlyoptarg(arg, shortlist, namelist)
+        if not hasval and takeval and pos + 1 >= argcount:
+            # missing last argument
+            break
+        if not flag or hasval and not takeval:
+            # non-option argument or -b/--bool=INVALID_VALUE
+            if gnu:
+                parsedargs.append(arg)
+                pos += 1
+            else:
+                break
+        elif hasval == takeval:
+            # -b/--bool or -s/--str=VALUE
+            parsedopts.append((flag, val))
+            pos += 1
+        else:
+            # -s/--str VALUE
+            parsedopts.append((flag, args[pos + 1]))
+            pos += 2
+
+    parsedargs.extend(args[pos:argcount])
+    parsedargs.extend(args[argcount + (not keepsep):])
+    return parsedopts, parsedargs
+
 def gnugetopt(args, options, longoptions):
     """Parse options mostly like getopt.gnu_getopt.
 
@@ -51,7 +226,7 @@  def gnugetopt(args, options, longoptions
     return opts, args
 
 
-def fancyopts(args, options, state, gnu=False):
+def fancyopts(args, options, state, gnu=False, early=False):
     """
     read args, parse options, and store options in state
 
@@ -124,7 +299,9 @@  def fancyopts(args, options, state, gnu=
             namelist.append(oname)
 
     # parse arguments
-    if gnu:
+    if early:
+        parse = functools.partial(earlygetopt, gnu=gnu)
+    elif gnu:
         parse = gnugetopt
     else:
         parse = pycompat.getoptb
diff --git a/tests/test-doctest.py b/tests/test-doctest.py
--- a/tests/test-doctest.py
+++ b/tests/test-doctest.py
@@ -48,6 +48,7 @@  testmod('mercurial.context')
 testmod('mercurial.dagparser', optionflags=doctest.NORMALIZE_WHITESPACE)
 testmod('mercurial.dispatch')
 testmod('mercurial.encoding')
+testmod('mercurial.fancyopts')
 testmod('mercurial.formatter')
 testmod('mercurial.hg')
 testmod('mercurial.hgweb.hgwebdir_mod')