Patchwork [4,of,5] revsetlang: match tree by helper function on optimize

login
register
mail settings
Submitter Yuya Nishihara
Date Aug. 31, 2017, 2:42 p.m.
Message ID <67ed9cb475e3b952572b.1504190533@mimosa>
Download mbox | patch
Permalink /patch/23537/
State Accepted
Headers show

Comments

Yuya Nishihara - Aug. 31, 2017, 2:42 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1455712859 -32400
#      Wed Feb 17 21:40:59 2016 +0900
# Node ID 67ed9cb475e3b952572b9bc46e2ed56f0474f381
# Parent  af0821ec50deb2b65db7e0fecef6b0d379256533
revsetlang: match tree by helper function on optimize

This should make optimize() more readable and less error-prone, but it doubles
the parsing cost.

  (original)
  $ python -m timeit -n10000 -s 'from mercurial import revsetlang as L' \
  'L.optimize(L.analyze(L.parse("ancestors(x) and not ancestors(y)")))'
  10000 loops, best of 3: 79.3 usec per loop

  (this patch)
  $ python -m timeit -n10000 -s 'from mercurial import revsetlang as L' \
  'L._treecache.clear(); \
   L.optimize(L.analyze(L.parse("ancestors(x) and not ancestors(y)")))'
  10000 loops, best of 3: 201 usec per loop

Patch

diff --git a/mercurial/revsetlang.py b/mercurial/revsetlang.py
--- a/mercurial/revsetlang.py
+++ b/mercurial/revsetlang.py
@@ -258,6 +258,18 @@  def _build(tmplspec, *repls):
     template = _cachedtree(tmplspec)
     return parser.buildtree(template, ('symbol', '_'), *repls)
 
+def _match(patspec, tree):
+    """Test if a tree matches the given pattern statement; return the matches
+
+    >>> _match('f(_)', parse('f()'))
+    >>> _match('f(_)', parse('f(1)'))
+    [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
+    >>> _match('f(_)', parse('f(1, 2)'))
+    """
+    pattern = _cachedtree(patspec)
+    return parser.matchtree(pattern, tree, ('symbol', '_'),
+                            {'keyvalue', 'list'})
+
 def _isnamedfunc(x, funcname):
     """Check if given tree matches named function"""
     return x and x[0] == 'func' and getsymbol(x[1]) == funcname
@@ -278,15 +290,7 @@  def _matchnamedfunc(x, funcname):
     return x[2]
 
 def _matchonly(revs, bases):
-    """
-    >>> f = lambda *args: _matchonly(*map(parse, args))
-    >>> f('ancestors(A)', 'not ancestors(B)')
-    ('list', ('symbol', 'A'), ('symbol', 'B'))
-    """
-    ta = _matchnamedfunc(revs, 'ancestors')
-    tb = bases and bases[0] == 'not' and _matchnamedfunc(bases[1], 'ancestors')
-    if _isposargs(ta, 1) and _isposargs(tb, 1):
-        return ('list', ta, tb)
+    return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
 
 def _fixops(x):
     """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
@@ -389,8 +393,9 @@  def _optimize(x, small):
         if m:
             return w, _build('only(_, _)', *m[1:])
 
-        if tb is not None and tb[0] == 'not':
-            return wa, ('difference', ta, tb[1])
+        m = _match('not _', tb)
+        if m:
+            return wa, ('difference', ta, m[1])
         if wa > wb:
             op = 'andsmally'
         return w, (op, ta, tb)
@@ -424,7 +429,7 @@  def _optimize(x, small):
         return max(ws), (op, ('list',) + tuple(ts))
     elif op == 'not':
         # Optimize not public() to _notpublic() because we have a fast version
-        if x[1][:3] == ('func', ('symbol', 'public'), None):
+        if _match('public()', x[1]):
             o = _optimize(_build('_notpublic()'), not small)
             return o[0], o[1]
         else: