Patchwork [4,of,6] parser: move alias declaration parser to common rule-set class

login
register
mail settings
Submitter Yuya Nishihara
Date April 1, 2016, 3:37 p.m.
Message ID <2a3e01c30ddc08a5ff33.1459525030@mimosa>
Download mbox | patch
Permalink /patch/14225/
State Superseded
Commit 6d6201fc5aaeff8fb25463551c2511f5b30f06eb
Headers show

Comments

Yuya Nishihara - April 1, 2016, 3:37 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1456736043 -32400
#      Mon Feb 29 17:54:03 2016 +0900
# Node ID 2a3e01c30ddc08a5ff331b8d9b30ac8c014090d6
# Parent  ae143c83b4c139422da83eab3c9c3ffb5825a6e8
parser: move alias declaration parser to common rule-set class

The original _parsealiasdecl() function is split into common _builddecl()
and revset-specific _parsealiasdecl(). And the original _parsealiasdecl()
call is temporarily replaced by rules._builddecl(), which should be eliminated
later.

The doctest is kept in revset._parsealiasdecl() because it requires concrete
syntax rules.
Pierre-Yves David - April 1, 2016, 9:32 p.m.
On 04/01/2016 08:37 AM, Yuya Nishihara wrote:
> # HG changeset patch
> # User Yuya Nishihara <yuya@tcha.org>
> # Date 1456736043 -32400
> #      Mon Feb 29 17:54:03 2016 +0900
> # Node ID 2a3e01c30ddc08a5ff331b8d9b30ac8c014090d6
> # Parent  ae143c83b4c139422da83eab3c9c3ffb5825a6e8
> parser: move alias declaration parser to common rule-set class
>
> The original _parsealiasdecl() function is split into common _builddecl()
> and revset-specific _parsealiasdecl(). And the original _parsealiasdecl()
> call is temporarily replaced by rules._builddecl(), which should be eliminated
> later.
>
> The doctest is kept in revset._parsealiasdecl() because it requires concrete
> syntax rules.
>
> diff --git a/mercurial/parser.py b/mercurial/parser.py
> --- a/mercurial/parser.py
> +++ b/mercurial/parser.py
> @@ -253,3 +253,40 @@ class aliasrules(object):
>           self._getlist = getlist
>           self._symbolnode = symbolnode
>           self._funcnode = funcnode
> +
> +    def _builddecl(self, decl):

Why is this method private? It seems to be the main entry point for the 
object right now. Is this temporary?

> +        """Parse an alias declaration into ``(name, tree, args, errorstr)``
> +
> +        - ``name``: of declared alias (may be ``decl`` itself at error)
> +        - ``tree``: parse result (or ``None`` at error)
> +        - ``args``: list of argument names (or None for symbol declaration)
> +        - ``errorstr``: detail about detected error (or None)
> +        """
> +        try:
> +            tree = self._parsedecl(decl)

So all the actual syntax parsing is done in self._parsedecl, but the 
analysis of that tree is done in this method. We should probably mention it.

Should we have a very simple doctest to validate this?

> +        except error.ParseError as inst:
> +            return (decl, None, None, parseerrordetail(inst))
> +
> +        if tree[0] == self._symbolnode:
> +            # "name = ...." style
> +            name = tree[1]
> +            if name.startswith('$'):
> +                return (decl, None, None, _("'$' not for alias arguments"))
> +            return (name, tree, None, None)
> +
> +        if tree[0] == self._funcnode and tree[1][0] == self._symbolnode:
> +            # "name(arg, ....) = ...." style
> +            name = tree[1][1]
> +            if name.startswith('$'):
> +                return (decl, None, None, _("'$' not for alias arguments"))
> +            args = []
> +            for arg in self._getlist(tree[2]):
> +                if arg[0] != self._symbolnode:
> +                    return (decl, None, None, _("invalid argument list"))
> +                args.append(arg[1])
> +            if len(args) != len(set(args)):
> +                return (name, None, None,
> +                        _("argument names collide with each other"))
> +            return (name, tree[:2], args, None)
> +
> +        return (decl, None, None, _("invalid format"))
> diff --git a/mercurial/revset.py b/mercurial/revset.py
> --- a/mercurial/revset.py
> +++ b/mercurial/revset.py
> @@ -2241,75 +2241,41 @@ def _tokenizealias(program):
>   def _parsealiasdecl(decl):
>       """Parse alias declaration ``decl``
>
> -    This returns ``(name, tree, args, errorstr)`` tuple:
> -
> -    - ``name``: of declared alias (may be ``decl`` itself at error)
> -    - ``tree``: parse result (or ``None`` at error)
> -    - ``args``: list of alias argument names (or None for symbol declaration)
> -    - ``errorstr``: detail about detected error (or None)
> -
> -    >>> _parsealiasdecl('foo')
> +    >>> f = parser.aliasrules('', _parsealiasdecl, None, getlist)._builddecl
> +    >>> f('foo')
>       ('foo', ('symbol', 'foo'), None, None)
> -    >>> _parsealiasdecl('$foo')
> +    >>> f('$foo')
>       ('$foo', None, None, "'$' not for alias arguments")
> -    >>> _parsealiasdecl('foo::bar')
> +    >>> f('foo::bar')
>       ('foo::bar', None, None, 'invalid format')
> -    >>> _parsealiasdecl('foo bar')
> +    >>> f('foo bar')
>       ('foo bar', None, None, 'at 4: invalid token')
> -    >>> _parsealiasdecl('foo()')
> +    >>> f('foo()')
>       ('foo', ('func', ('symbol', 'foo')), [], None)
> -    >>> _parsealiasdecl('$foo()')
> +    >>> f('$foo()')
>       ('$foo()', None, None, "'$' not for alias arguments")
> -    >>> _parsealiasdecl('foo($1, $2)')
> +    >>> f('foo($1, $2)')
>       ('foo', ('func', ('symbol', 'foo')), ['$1', '$2'], None)
> -    >>> _parsealiasdecl('foo(bar_bar, baz.baz)')
> +    >>> f('foo(bar_bar, baz.baz)')
>       ('foo', ('func', ('symbol', 'foo')), ['bar_bar', 'baz.baz'], None)
> -    >>> _parsealiasdecl('foo($1, $2, nested($1, $2))')
> +    >>> f('foo($1, $2, nested($1, $2))')
>       ('foo($1, $2, nested($1, $2))', None, None, 'invalid argument list')
> -    >>> _parsealiasdecl('foo(bar($1, $2))')
> +    >>> f('foo(bar($1, $2))')
>       ('foo(bar($1, $2))', None, None, 'invalid argument list')
> -    >>> _parsealiasdecl('foo("string")')
> +    >>> f('foo("string")')
>       ('foo("string")', None, None, 'invalid argument list')
> -    >>> _parsealiasdecl('foo($1, $2')
> +    >>> f('foo($1, $2')
>       ('foo($1, $2', None, None, 'at 10: unexpected token: end')
> -    >>> _parsealiasdecl('foo("string')
> +    >>> f('foo("string')
>       ('foo("string', None, None, 'at 5: unterminated string')
> -    >>> _parsealiasdecl('foo($1, $2, $1)')
> +    >>> f('foo($1, $2, $1)')
>       ('foo', None, None, 'argument names collide with each other')
>       """
>       p = parser.parser(elements)
> -    try:
> -        tree, pos = p.parse(_tokenizealias(decl))
> -        if (pos != len(decl)):
> -            raise error.ParseError(_('invalid token'), pos)
> -        tree = parser.simplifyinfixops(tree, ('list',))
> -    except error.ParseError as inst:
> -        return (decl, None, None, parser.parseerrordetail(inst))
> -
> -    if True:  # XXX to be removed
> -        if tree[0] == 'symbol':
> -            # "name = ...." style
> -            name = tree[1]
> -            if name.startswith('$'):
> -                return (decl, None, None, _("'$' not for alias arguments"))
> -            return (name, tree, None, None)
> -
> -        if tree[0] == 'func' and tree[1][0] == 'symbol':
> -            # "name(arg, ....) = ...." style
> -            name = tree[1][1]
> -            if name.startswith('$'):
> -                return (decl, None, None, _("'$' not for alias arguments"))
> -            args = []
> -            for arg in getlist(tree[2]):
> -                if arg[0] != 'symbol':
> -                    return (decl, None, None, _("invalid argument list"))
> -                args.append(arg[1])
> -            if len(args) != len(set(args)):
> -                return (name, None, None,
> -                        _("argument names collide with each other"))
> -            return (name, tree[:2], args, None)
> -
> -        return (decl, None, None, _("invalid format"))
> +    tree, pos = p.parse(_tokenizealias(decl))
> +    if pos != len(decl):
> +        raise error.ParseError(_('invalid token'), pos)
> +    return parser.simplifyinfixops(tree, ('list',))
>
>   def _relabelaliasargs(tree, args):
>       if not isinstance(tree, tuple):
> @@ -2381,7 +2347,9 @@ class revsetalias(object):
>           h = heads(default)
>           b($1) = ancestors($1) - ancestors(default)
>           '''
> -        self.name, self.tree, self.args, self.error = _parsealiasdecl(name)
> +        rules = parser.aliasrules(_('revset alias'),
> +                                  _parsealiasdecl, None, getlist)
> +        self.name, self.tree, self.args, self.error = rules._builddecl(name)
>           if self.error:
>               self.error = _('failed to parse the declaration of revset alias'
>                              ' "%s": %s') % (self.name, self.error)
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>
Yuya Nishihara - April 2, 2016, 2:52 a.m.
On Fri, 1 Apr 2016 14:32:41 -0700, Pierre-Yves David wrote:
> On 04/01/2016 08:37 AM, Yuya Nishihara wrote:
> > # HG changeset patch
> > # User Yuya Nishihara <yuya@tcha.org>
> > # Date 1456736043 -32400
> > #      Mon Feb 29 17:54:03 2016 +0900
> > # Node ID 2a3e01c30ddc08a5ff331b8d9b30ac8c014090d6
> > # Parent  ae143c83b4c139422da83eab3c9c3ffb5825a6e8
> > parser: move alias declaration parser to common rule-set class
> >
> > The original _parsealiasdecl() function is split into common _builddecl()
> > and revset-specific _parsealiasdecl(). And the original _parsealiasdecl()
> > call is temporarily replaced by rules._builddecl(), which should be eliminated
> > later.
> >
> > The doctest is kept in revset._parsealiasdecl() because it requires concrete
> > syntax rules.
> >
> > diff --git a/mercurial/parser.py b/mercurial/parser.py
> > --- a/mercurial/parser.py
> > +++ b/mercurial/parser.py
> > @@ -253,3 +253,40 @@ class aliasrules(object):
> >           self._getlist = getlist
> >           self._symbolnode = symbolnode
> >           self._funcnode = funcnode
> > +
> > +    def _builddecl(self, decl):  
> 
> Why is this method private? It seems to be the main entry point for the 
> object right now. Is this temporary?

Yes, temporary. All private calls will be removed by the subsequent patches.

> > +        """Parse an alias declaration into ``(name, tree, args, errorstr)``
> > +
> > +        - ``name``: of declared alias (may be ``decl`` itself at error)
> > +        - ``tree``: parse result (or ``None`` at error)
> > +        - ``args``: list of argument names (or None for symbol declaration)
> > +        - ``errorstr``: detail about detected error (or None)
> > +        """
> > +        try:
> > +            tree = self._parsedecl(decl)  
> 
> So all the actual syntax parsing is done in self._parsedecl, but the
> analysis of that tree is done in this method. We should probably mention it.

Ok, will do in next version.

> Should we have a very simple doctest to validate this?

Hmm, I can split the original doctest to parsing and analysis tests. The latter
can be moved to this function.

Patch

diff --git a/mercurial/parser.py b/mercurial/parser.py
--- a/mercurial/parser.py
+++ b/mercurial/parser.py
@@ -253,3 +253,40 @@  class aliasrules(object):
         self._getlist = getlist
         self._symbolnode = symbolnode
         self._funcnode = funcnode
+
+    def _builddecl(self, decl):
+        """Parse an alias declaration into ``(name, tree, args, errorstr)``
+
+        - ``name``: of declared alias (may be ``decl`` itself at error)
+        - ``tree``: parse result (or ``None`` at error)
+        - ``args``: list of argument names (or None for symbol declaration)
+        - ``errorstr``: detail about detected error (or None)
+        """
+        try:
+            tree = self._parsedecl(decl)
+        except error.ParseError as inst:
+            return (decl, None, None, parseerrordetail(inst))
+
+        if tree[0] == self._symbolnode:
+            # "name = ...." style
+            name = tree[1]
+            if name.startswith('$'):
+                return (decl, None, None, _("'$' not for alias arguments"))
+            return (name, tree, None, None)
+
+        if tree[0] == self._funcnode and tree[1][0] == self._symbolnode:
+            # "name(arg, ....) = ...." style
+            name = tree[1][1]
+            if name.startswith('$'):
+                return (decl, None, None, _("'$' not for alias arguments"))
+            args = []
+            for arg in self._getlist(tree[2]):
+                if arg[0] != self._symbolnode:
+                    return (decl, None, None, _("invalid argument list"))
+                args.append(arg[1])
+            if len(args) != len(set(args)):
+                return (name, None, None,
+                        _("argument names collide with each other"))
+            return (name, tree[:2], args, None)
+
+        return (decl, None, None, _("invalid format"))
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -2241,75 +2241,41 @@  def _tokenizealias(program):
 def _parsealiasdecl(decl):
     """Parse alias declaration ``decl``
 
-    This returns ``(name, tree, args, errorstr)`` tuple:
-
-    - ``name``: of declared alias (may be ``decl`` itself at error)
-    - ``tree``: parse result (or ``None`` at error)
-    - ``args``: list of alias argument names (or None for symbol declaration)
-    - ``errorstr``: detail about detected error (or None)
-
-    >>> _parsealiasdecl('foo')
+    >>> f = parser.aliasrules('', _parsealiasdecl, None, getlist)._builddecl
+    >>> f('foo')
     ('foo', ('symbol', 'foo'), None, None)
-    >>> _parsealiasdecl('$foo')
+    >>> f('$foo')
     ('$foo', None, None, "'$' not for alias arguments")
-    >>> _parsealiasdecl('foo::bar')
+    >>> f('foo::bar')
     ('foo::bar', None, None, 'invalid format')
-    >>> _parsealiasdecl('foo bar')
+    >>> f('foo bar')
     ('foo bar', None, None, 'at 4: invalid token')
-    >>> _parsealiasdecl('foo()')
+    >>> f('foo()')
     ('foo', ('func', ('symbol', 'foo')), [], None)
-    >>> _parsealiasdecl('$foo()')
+    >>> f('$foo()')
     ('$foo()', None, None, "'$' not for alias arguments")
-    >>> _parsealiasdecl('foo($1, $2)')
+    >>> f('foo($1, $2)')
     ('foo', ('func', ('symbol', 'foo')), ['$1', '$2'], None)
-    >>> _parsealiasdecl('foo(bar_bar, baz.baz)')
+    >>> f('foo(bar_bar, baz.baz)')
     ('foo', ('func', ('symbol', 'foo')), ['bar_bar', 'baz.baz'], None)
-    >>> _parsealiasdecl('foo($1, $2, nested($1, $2))')
+    >>> f('foo($1, $2, nested($1, $2))')
     ('foo($1, $2, nested($1, $2))', None, None, 'invalid argument list')
-    >>> _parsealiasdecl('foo(bar($1, $2))')
+    >>> f('foo(bar($1, $2))')
     ('foo(bar($1, $2))', None, None, 'invalid argument list')
-    >>> _parsealiasdecl('foo("string")')
+    >>> f('foo("string")')
     ('foo("string")', None, None, 'invalid argument list')
-    >>> _parsealiasdecl('foo($1, $2')
+    >>> f('foo($1, $2')
     ('foo($1, $2', None, None, 'at 10: unexpected token: end')
-    >>> _parsealiasdecl('foo("string')
+    >>> f('foo("string')
     ('foo("string', None, None, 'at 5: unterminated string')
-    >>> _parsealiasdecl('foo($1, $2, $1)')
+    >>> f('foo($1, $2, $1)')
     ('foo', None, None, 'argument names collide with each other')
     """
     p = parser.parser(elements)
-    try:
-        tree, pos = p.parse(_tokenizealias(decl))
-        if (pos != len(decl)):
-            raise error.ParseError(_('invalid token'), pos)
-        tree = parser.simplifyinfixops(tree, ('list',))
-    except error.ParseError as inst:
-        return (decl, None, None, parser.parseerrordetail(inst))
-
-    if True:  # XXX to be removed
-        if tree[0] == 'symbol':
-            # "name = ...." style
-            name = tree[1]
-            if name.startswith('$'):
-                return (decl, None, None, _("'$' not for alias arguments"))
-            return (name, tree, None, None)
-
-        if tree[0] == 'func' and tree[1][0] == 'symbol':
-            # "name(arg, ....) = ...." style
-            name = tree[1][1]
-            if name.startswith('$'):
-                return (decl, None, None, _("'$' not for alias arguments"))
-            args = []
-            for arg in getlist(tree[2]):
-                if arg[0] != 'symbol':
-                    return (decl, None, None, _("invalid argument list"))
-                args.append(arg[1])
-            if len(args) != len(set(args)):
-                return (name, None, None,
-                        _("argument names collide with each other"))
-            return (name, tree[:2], args, None)
-
-        return (decl, None, None, _("invalid format"))
+    tree, pos = p.parse(_tokenizealias(decl))
+    if pos != len(decl):
+        raise error.ParseError(_('invalid token'), pos)
+    return parser.simplifyinfixops(tree, ('list',))
 
 def _relabelaliasargs(tree, args):
     if not isinstance(tree, tuple):
@@ -2381,7 +2347,9 @@  class revsetalias(object):
         h = heads(default)
         b($1) = ancestors($1) - ancestors(default)
         '''
-        self.name, self.tree, self.args, self.error = _parsealiasdecl(name)
+        rules = parser.aliasrules(_('revset alias'),
+                                  _parsealiasdecl, None, getlist)
+        self.name, self.tree, self.args, self.error = rules._builddecl(name)
         if self.error:
             self.error = _('failed to parse the declaration of revset alias'
                            ' "%s": %s') % (self.name, self.error)