Patchwork [2,of,7,V4] revset: teach optimize if current expression should define/follow order

login
register
mail settings
Submitter Yuya Nishihara
Date June 23, 2016, 3:59 p.m.
Message ID <0a1b1af8c9b8aaa0690d.1466697594@mimosa>
Download mbox | patch
Permalink /patch/15580/
State Accepted
Headers show

Comments

Yuya Nishihara - June 23, 2016, 3:59 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1455627736 -32400
#      Tue Feb 16 22:02:16 2016 +0900
# Node ID 0a1b1af8c9b8aaa0690de2fc0614b531dc03a5c2
# Parent  8ae1910bc804846ea065854aea7896e3c46cde05
revset: teach optimize if current expression should define/follow order

New flag 'order' is the hint to determine if a function or operation can
enforce its ordering requirement or take the ordering already defined. It
will be used to fix a couple of ordering bugs, such as:

 a) 'x & (y | z)' disregards the order of 'x' (issue5100)
 b) 'x & y:z' is listed from 'y' to 'z'
 c) 'x & y' can be rewritten as 'y & x' if weight(x) > weight(y)

(a) and (b) are bugs of the revset core. Before this, there was no way to
tell if 'orset()' and 'rangeset()' can enforce its ordering. These bugs
could be addressed by overriding __and__() of the initial set to take the
ordering of the other set:

    class initialset(fullreposet):
        def __and__(self, other):
            # allow other to enforce its ordering
            return other & self

but it would expose (c), which is a hidden bug of optimize(). So either ways,
optimize() have to know the current ordering requirement. Otherwise, it
couldn't rewrite expressions by weights with no output change, nor tell how
a revset function or operation should order the entries.

'order' is tri-state. It starts with 'define', and shifts to 'follow' by
'x & y'. It changes back to 'define' on function call 'f(x)' or function-like
operation 'x (f) y' because 'f' may have its own ordering requirement for 'x'
and 'y'. The state 'any' will allow us to avoid extra cost that would be
necessary to constrain ordering where it isn't important, 'not x'.

Changes from V3: I've made these constants public, which will be used to
specify how matcher will take the order of the input set:

    # getlogrevs()
    matcher = revset.match(repo.ui, expr, order=revset.followorder)
    revs = matcher(repo, revs)

Patch

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -2302,6 +2302,50 @@  methods = {
     "parentpost": p1,
 }
 
+# Constants for ordering requirement, used in _optimize():
+#
+# If 'define', any nested functions and operations can change the ordering of
+# the entries in the set. If 'follow', any nested functions and operations
+# should take the ordering specified by the first operand to the '&' operator.
+#
+# For instance,
+#
+#   X & (Y | Z)
+#   ^   ^^^^^^^
+#   |   follow
+#   define
+#
+# will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order
+# of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.
+#
+# 'any' means the order doesn't matter. For instance,
+#
+#   X & !Y
+#        ^
+#        any
+#
+# 'y()' can either enforce its ordering requirement or take the ordering
+# specified by 'x()' because 'not()' doesn't care the order.
+#
+# Transition of ordering requirement:
+#
+# 1. starts with 'define'
+# 2. shifts to 'follow' by 'x & y'
+# 3. changes back to 'define' on function call 'f(x)' or function-like
+#    operation 'x (f) y' because 'f' may have its own ordering requirement
+#    for 'x' and 'y' (e.g. 'first(x)')
+#
+anyorder = 'any'        # don't care the order
+defineorder = 'define'  # should define the order
+followorder = 'follow'  # must follow the current order
+
+# transition table for 'x & y', from the current expression 'x' to 'y'
+_tofolloworder = {
+    anyorder: anyorder,
+    defineorder: followorder,
+    followorder: followorder,
+}
+
 def _matchonly(revs, bases):
     """
     >>> f = lambda *args: _matchonly(*map(parse, args))
@@ -2317,7 +2361,11 @@  def _matchonly(revs, bases):
         and getstring(bases[1][1], _('not a symbol')) == 'ancestors'):
         return ('list', revs[2], bases[1][2])
 
-def _optimize(x, small):
+def _optimize(x, small, order):
+    """
+    'order' specifies how the current expression 'x' is ordered (see the
+    constants defined above)
+    """
     if x is None:
         return 0, x
 
@@ -2327,30 +2375,32 @@  def _optimize(x, small):
 
     op = x[0]
     if op == 'minus':
-        return _optimize(('and', x[1], ('not', x[2])), small)
+        return _optimize(('and', x[1], ('not', x[2])), small, order)
     elif op == 'only':
         t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
-        return _optimize(t, small)
+        return _optimize(t, small, order)
     elif op == 'onlypost':
-        return _optimize(('func', ('symbol', 'only'), x[1]), small)
+        return _optimize(('func', ('symbol', 'only'), x[1]), small, order)
     elif op == 'dagrangepre':
-        return _optimize(('func', ('symbol', 'ancestors'), x[1]), small)
+        return _optimize(('func', ('symbol', 'ancestors'), x[1]), small, order)
     elif op == 'dagrangepost':
-        return _optimize(('func', ('symbol', 'descendants'), x[1]), small)
+        t = ('func', ('symbol', 'descendants'), x[1])
+        return _optimize(t, small, order)
     elif op == 'rangeall':
-        return _optimize(('range', ('string', '0'), ('string', 'tip')), small)
+        t = ('range', ('string', '0'), ('string', 'tip'))
+        return _optimize(t, small, order)
     elif op == 'rangepre':
-        return _optimize(('range', ('string', '0'), x[1]), small)
+        return _optimize(('range', ('string', '0'), x[1]), small, order)
     elif op == 'rangepost':
-        return _optimize(('range', x[1], ('string', 'tip')), small)
+        return _optimize(('range', x[1], ('string', 'tip')), small, order)
     elif op == 'negate':
         s = getstring(x[1], _("can't negate that"))
-        return _optimize(('string', '-' + s), small)
+        return _optimize(('string', '-' + s), small, order)
     elif op in 'string symbol negate':
         return smallbonus, x # single revisions are small
     elif op == 'and':
-        wa, ta = _optimize(x[1], True)
-        wb, tb = _optimize(x[2], True)
+        wa, ta = _optimize(x[1], True, order)
+        wb, tb = _optimize(x[2], True, _tofolloworder[order])
         w = min(wa, wb)
 
         # (::x and not ::y)/(not ::y and ::x) have a fast path
@@ -2376,12 +2426,12 @@  def _optimize(x, small):
             else:
                 s = '\0'.join(t[1] for w, t in ss)
                 y = ('func', ('symbol', '_list'), ('string', s))
-                w, t = _optimize(y, False)
+                w, t = _optimize(y, False, order)
             ws.append(w)
             ts.append(t)
             del ss[:]
         for y in x[1:]:
-            w, t = _optimize(y, False)
+            w, t = _optimize(y, False, order)
             if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
                 ss.append((w, t))
                 continue
@@ -2399,34 +2449,34 @@  def _optimize(x, small):
         # Optimize not public() to _notpublic() because we have a fast version
         if x[1] == ('func', ('symbol', 'public'), None):
             newsym = ('func', ('symbol', '_notpublic'), None)
-            o = _optimize(newsym, not small)
+            o = _optimize(newsym, not small, order)
             return o[0], o[1]
         else:
-            o = _optimize(x[1], not small)
+            o = _optimize(x[1], not small, anyorder)
             return o[0], (op, o[1])
     elif op == 'parentpost':
-        o = _optimize(x[1], small)
+        o = _optimize(x[1], small, defineorder)
         return o[0], (op, o[1])
     elif op == 'group':
-        return _optimize(x[1], small)
+        return _optimize(x[1], small, order)
     elif op in 'dagrange range parent ancestorspec':
         if op == 'parent':
             # x^:y means (x^) : y, not x ^ (:y)
             post = ('parentpost', x[1])
             if x[2][0] == 'dagrangepre':
-                return _optimize(('dagrange', post, x[2][1]), small)
+                return _optimize(('dagrange', post, x[2][1]), small, order)
             elif x[2][0] == 'rangepre':
-                return _optimize(('range', post, x[2][1]), small)
-
-        wa, ta = _optimize(x[1], small)
-        wb, tb = _optimize(x[2], small)
+                return _optimize(('range', post, x[2][1]), small, order)
+
+        wa, ta = _optimize(x[1], small, defineorder)
+        wb, tb = _optimize(x[2], small, defineorder)
         return wa + wb, (op, ta, tb)
     elif op == 'list':
-        ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
+        ws, ts = zip(*(_optimize(y, small, order) for y in x[1:]))
         return sum(ws), (op,) + ts
     elif op == 'func':
         f = getstring(x[1], _("not a symbol"))
-        wa, ta = _optimize(x[2], small)
+        wa, ta = _optimize(x[2], small, defineorder)
         if f in ("author branch closed date desc file grep keyword "
                  "outgoing user"):
             w = 10 # slow
@@ -2446,7 +2496,7 @@  def _optimize(x, small):
     return 1, x
 
 def optimize(tree):
-    _weight, newtree = _optimize(tree, small=True)
+    _weight, newtree = _optimize(tree, small=True, order=defineorder)
     return newtree
 
 # the set of valid characters for the initial letter of symbols in