Patchwork [2,of,5] revset: move tagging of alias arguments from tokenization to parsing phase

login
register
mail settings
Submitter Yuya Nishihara
Date March 29, 2016, 3:27 p.m.
Message ID <fa7880cac50272f805ae.1459265267@mimosa>
Download mbox | patch
Permalink /patch/14140/
State Accepted
Headers show

Comments

Yuya Nishihara - March 29, 2016, 3:27 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1455446913 -32400
#      Sun Feb 14 19:48:33 2016 +0900
# Node ID fa7880cac50272f805ae1a233a477b0a9f56a585
# Parent  52999dfc8c449e221709b537e35cdac144ace7d9
revset: move tagging of alias arguments from tokenization to parsing phase

In short, this patch moves the hack from tokenizedefn() to _relabelaliasargs(),
which is called after parsing. This change aims to eliminate tight dependency
on the revset tokenizer.

Before this patch, we had to rewrite an alias argument to a pseudo function:

  "$1" -> "_aliasarg('$1')"
  ('symbol', '$1') -> ('function', ('symbol', '_aliasarg'), ('string', '$1'))

This was because the tokenizer must generate tokens that are syntactically
valid. By moving the process to the parsing phase, we can assign a unique tag
to an alias argument.

  ('symbol', '$1') -> ('_aliasarg', '$1')

Since new _aliasarg node never be generated from a user input, we no longer
have to verify a user input at findaliases(). The test for _aliasarg("$1") is
removed as it is syntactically valid and should pass the parsing phase.

Patch

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -2261,14 +2261,10 @@  def optimize(x, small):
         return w + wa, (op, x[1], ta)
     return 1, x
 
-_aliasarg = ('func', ('symbol', '_aliasarg'))
 def _getaliasarg(tree):
-    """If tree matches ('func', ('symbol', '_aliasarg'), ('string', X))
-    return X, None otherwise.
-    """
-    if (len(tree) == 3 and tree[:2] == _aliasarg
-        and tree[2][0] == 'string'):
-        return tree[2][1]
+    """If tree matches ('_aliasarg', X) return X, None otherwise"""
+    if tree[0] == '_aliasarg':
+        return tree[1]
     return None
 
 def _checkaliasarg(tree, known=None):
@@ -2369,70 +2365,64 @@  def _parsealiasdecl(decl):
     except error.ParseError as inst:
         return (decl, None, None, parseerrordetail(inst))
 
+def _relabelaliasargs(tree, args):
+    if not isinstance(tree, tuple):
+        return tree
+    op = tree[0]
+    if op != 'symbol':
+        return (op,) + tuple(_relabelaliasargs(x, args) for x in tree[1:])
+
+    assert len(tree) == 2
+    sym = tree[1]
+    if sym in args:
+        op = '_aliasarg'
+    elif sym.startswith('$'):
+        raise error.ParseError(_("'$' not for alias arguments"))
+    return (op, sym)
+
 def _parsealiasdefn(defn, args):
     """Parse alias definition ``defn``
 
-    This function also replaces alias argument references in the
-    specified definition by ``_aliasarg(ARGNAME)``.
+    This function marks alias argument references as ``_aliasarg``.
 
     ``args`` is a list of alias argument names, or None if the alias
     is declared as a symbol.
 
     This returns "tree" as parsing result.
 
+    >>> def prettyformat(tree):
+    ...     return parser.prettyformat(tree, ('_aliasarg', 'string', 'symbol'))
     >>> args = ['$1', '$2', 'foo']
     >>> print prettyformat(_parsealiasdefn('$1 or foo', args))
     (or
-      (func
-        ('symbol', '_aliasarg')
-        ('string', '$1'))
-      (func
-        ('symbol', '_aliasarg')
-        ('string', 'foo')))
+      ('_aliasarg', '$1')
+      ('_aliasarg', 'foo'))
     >>> try:
     ...     _parsealiasdefn('$1 or $bar', args)
     ... except error.ParseError, inst:
     ...     print parseerrordetail(inst)
-    at 6: '$' not for alias arguments
+    '$' not for alias arguments
     >>> args = ['$1', '$10', 'foo']
     >>> print prettyformat(_parsealiasdefn('$10 or foobar', args))
     (or
-      (func
-        ('symbol', '_aliasarg')
-        ('string', '$10'))
+      ('_aliasarg', '$10')
       ('symbol', 'foobar'))
     >>> print prettyformat(_parsealiasdefn('"$1" or "foo"', args))
     (or
       ('string', '$1')
       ('string', 'foo'))
     """
-    def tokenizedefn(program, lookup=None):
-        if args:
-            argset = set(args)
-        else:
-            argset = set()
-
-        for t, value, pos in _tokenizealias(program, lookup=lookup):
-            if t == 'symbol':
-                if value in argset:
-                    # emulate tokenization of "_aliasarg('ARGNAME')":
-                    # "_aliasarg()" is an unknown symbol only used separate
-                    # alias argument placeholders from regular strings.
-                    yield ('symbol', '_aliasarg', pos)
-                    yield ('(', None, pos)
-                    yield ('string', value, pos)
-                    yield (')', None, pos)
-                    continue
-                elif value.startswith('$'):
-                    raise error.ParseError(_("'$' not for alias arguments"),
-                                           pos)
-            yield (t, value, pos)
+    if args:
+        args = set(args)
+    else:
+        args = set()
 
     p = parser.parser(elements)
-    tree, pos = p.parse(tokenizedefn(defn))
+    tree, pos = p.parse(_tokenizealias(defn))
     if pos != len(defn):
         raise error.ParseError(_('invalid token'), pos)
-    return parser.simplifyinfixops(tree, ('list', 'or'))
+    tree = parser.simplifyinfixops(tree, ('list', 'or'))
+    return _relabelaliasargs(tree, args)
 
 class revsetalias(object):
     # whether own `error` information is already shown or not.
@@ -2523,7 +2513,6 @@  def _expandaliases(aliases, tree, expand
     return result
 
 def findaliases(ui, tree, showwarning=None):
-    _checkaliasarg(tree)
     aliases = {}
     for k, v in ui.configitems('revsetalias'):
         alias = revsetalias(k, v)
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -1820,25 +1820,12 @@  but 'all()' should never be substituded 
     <spanset+ 0:9>>
   0
 
-  $ echo 'injectparamasstring2 = max(_aliasarg("$1"))' >> .hg/hgrc
-  $ echo 'callinjection2($1) = descendants(injectparamasstring2)' >> .hg/hgrc
-  $ try 'callinjection2(2:5)'
-  (func
-    ('symbol', 'callinjection2')
-    (range
-      ('symbol', '2')
-      ('symbol', '5')))
-  abort: failed to parse the definition of revset alias "injectparamasstring2": unknown identifier: _aliasarg
-  [255]
   $ hg debugrevspec --debug --config revsetalias.anotherbadone='branch(' "tip"
   ('symbol', 'tip')
   warning: failed to parse the definition of revset alias "anotherbadone": at 7: not a prefix: end
-  warning: failed to parse the definition of revset alias "injectparamasstring2": unknown identifier: _aliasarg
   * set:
   <baseset [9]>
   9
-  >>> data = file('.hg/hgrc', 'rb').read()
-  >>> file('.hg/hgrc', 'wb').write(data.replace('_aliasarg', ''))
 
   $ try 'tip'
   ('symbol', 'tip')