Patchwork [1,of,5] revset: teach optimize if current expression should define/follow order

login
register
mail settings
Submitter Yuya Nishihara
Date May 30, 2016, 3:11 p.m.
Message ID <a63e5cf3d8c09e8d2265.1464621091@mimosa>
Download mbox | patch
Permalink /patch/15259/
State Superseded
Headers show

Comments

Yuya Nishihara - May 30, 2016, 3:11 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1455627736 -32400
#      Tue Feb 16 22:02:16 2016 +0900
# Node ID a63e5cf3d8c09e8d2265c252d2f36bbdff18b663
# Parent  e53f961ac75fb73b38b15a92a2a96f3d28206e64
revset: teach optimize if current expression should define/follow order

New flag 'order' will be used to fix a couple of ordering bugs, such as:

 a) 'x & (y | z)' disregards the order of 'x' (issue 5100)
 b) 'x & y:z' is listed from 'y' to 'z'
 c) 'x & y' can be rewritten as 'y & x' if weight(x) > weight(y)

(c) is a hidden bug of optimize(), which could be exposed if we've fixed
(a) and (b) in some way. (a) and (b) are not bugs of optimize(), but they
can be addressed well in optimize() since optimize() knows the context.

'order' is tri-state. It starts with 'define', and shifts to 'follow' by
'x & y'. It changes back to 'define' on function call 'f(x)' or function-like
operation 'x (f) y' because 'f' may have its own ordering requirement for 'x'
and 'y'. The state 'any' will allow us to avoid extra cost that would be
necessary to constrain ordering where it isn't important, 'not x'. For details,
see also the tests added by the next patch.

The initial order will be switched whether an initial set is provided or not:

 m(repo)          # 'define': order is not defined yet
 m(repo, subset)  # 'follow': order should be defined by subset

Patch

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -2067,6 +2067,24 @@  methods = {
     "parentpost": p1,
 }
 
+# constants for ordering policy, used in _optimize():
+# 1. starts with 'define'
+# 2. shifts to 'follow' by 'x & y'
+# 3. changes back to 'define' on function call 'f(x)' or function-like
+#    operation 'x (f) y' because 'f' may have its own ordering requirement
+#    for 'x' and 'y' (e.g. 'first(x)')
+# 4. 'any' avoids extra cost of ordering where it isn't important, 'not x'
+_anyorder = 'any'  # don't care the order (e.g. 'not x')
+_defineorder = 'define'  # should define the order (e.g. 'x', '_(x)', 'x + y')
+_followorder = 'follow'  # must follow the current order (e.g. '_ & y')
+
+# transition table for 'x & y', from the current expression 'x' to 'y'
+_tofolloworder = {
+    _anyorder: _anyorder,
+    _defineorder: _followorder,
+    _followorder: _followorder,
+}
+
 def _matchonly(revs, bases):
     """
     >>> f = lambda *args: _matchonly(*map(parse, args))
@@ -2082,7 +2100,11 @@  def _matchonly(revs, bases):
         and getstring(bases[1][1], _('not a symbol')) == 'ancestors'):
         return ('list', revs[2], bases[1][2])
 
-def _optimize(x, small):
+def _optimize(x, small, order):
+    """
+    'order' specifies how the current expression 'x' is ordered (see the
+    constants defined above)
+    """
     if x is None:
         return 0, x
 
@@ -2092,30 +2114,32 @@  def _optimize(x, small):
 
     op = x[0]
     if op == 'minus':
-        return _optimize(('and', x[1], ('not', x[2])), small)
+        return _optimize(('and', x[1], ('not', x[2])), small, order)
     elif op == 'only':
         t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
-        return _optimize(t, small)
+        return _optimize(t, small, order)
     elif op == 'onlypost':
-        return _optimize(('func', ('symbol', 'only'), x[1]), small)
+        return _optimize(('func', ('symbol', 'only'), x[1]), small, order)
     elif op == 'dagrangepre':
-        return _optimize(('func', ('symbol', 'ancestors'), x[1]), small)
+        return _optimize(('func', ('symbol', 'ancestors'), x[1]), small, order)
     elif op == 'dagrangepost':
-        return _optimize(('func', ('symbol', 'descendants'), x[1]), small)
+        t = ('func', ('symbol', 'descendants'), x[1])
+        return _optimize(t, small, order)
     elif op == 'rangeall':
-        return _optimize(('range', ('string', '0'), ('string', 'tip')), small)
+        t = ('range', ('string', '0'), ('string', 'tip'))
+        return _optimize(t, small, order)
     elif op == 'rangepre':
-        return _optimize(('range', ('string', '0'), x[1]), small)
+        return _optimize(('range', ('string', '0'), x[1]), small, order)
     elif op == 'rangepost':
-        return _optimize(('range', x[1], ('string', 'tip')), small)
+        return _optimize(('range', x[1], ('string', 'tip')), small, order)
     elif op == 'negate':
         s = getstring(x[1], _("can't negate that"))
-        return _optimize(('string', '-' + s), small)
+        return _optimize(('string', '-' + s), small, order)
     elif op in 'string symbol negate':
         return smallbonus, x # single revisions are small
     elif op == 'and':
-        wa, ta = _optimize(x[1], True)
-        wb, tb = _optimize(x[2], True)
+        wa, ta = _optimize(x[1], True, order)
+        wb, tb = _optimize(x[2], True, _tofolloworder[order])
         w = min(wa, wb)
 
         # (::x and not ::y)/(not ::y and ::x) have a fast path
@@ -2141,12 +2165,12 @@  def _optimize(x, small):
             else:
                 s = '\0'.join(t[1] for w, t in ss)
                 y = ('func', ('symbol', '_list'), ('string', s))
-                w, t = _optimize(y, False)
+                w, t = _optimize(y, False, order)
             ws.append(w)
             ts.append(t)
             del ss[:]
         for y in x[1:]:
-            w, t = _optimize(y, False)
+            w, t = _optimize(y, False, order)
             if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
                 ss.append((w, t))
                 continue
@@ -2164,34 +2188,34 @@  def _optimize(x, small):
         # Optimize not public() to _notpublic() because we have a fast version
         if x[1] == ('func', ('symbol', 'public'), None):
             newsym = ('func', ('symbol', '_notpublic'), None)
-            o = _optimize(newsym, not small)
+            o = _optimize(newsym, not small, order)
             return o[0], o[1]
         else:
-            o = _optimize(x[1], not small)
+            o = _optimize(x[1], not small, _anyorder)
             return o[0], (op, o[1])
     elif op == 'parentpost':
-        o = _optimize(x[1], small)
+        o = _optimize(x[1], small, _defineorder)
         return o[0], (op, o[1])
     elif op == 'group':
-        return _optimize(x[1], small)
+        return _optimize(x[1], small, order)
     elif op in 'dagrange range parent ancestorspec':
         if op == 'parent':
             # x^:y means (x^) : y, not x ^ (:y)
             post = ('parentpost', x[1])
             if x[2][0] == 'dagrangepre':
-                return _optimize(('dagrange', post, x[2][1]), small)
+                return _optimize(('dagrange', post, x[2][1]), small, order)
             elif x[2][0] == 'rangepre':
-                return _optimize(('range', post, x[2][1]), small)
-
-        wa, ta = _optimize(x[1], small)
-        wb, tb = _optimize(x[2], small)
+                return _optimize(('range', post, x[2][1]), small, order)
+
+        wa, ta = _optimize(x[1], small, _defineorder)
+        wb, tb = _optimize(x[2], small, _defineorder)
         return wa + wb, (op, ta, tb)
     elif op == 'list':
-        ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
+        ws, ts = zip(*(_optimize(y, small, order) for y in x[1:]))
         return sum(ws), (op,) + ts
     elif op == 'func':
         f = getstring(x[1], _("not a symbol"))
-        wa, ta = _optimize(x[2], small)
+        wa, ta = _optimize(x[2], small, _defineorder)
         if f in ("author branch closed date desc file grep keyword "
                  "outgoing user"):
             w = 10 # slow
@@ -2211,7 +2235,7 @@  def _optimize(x, small):
     return 1, x
 
 def optimize(tree):
-    _weight, newtree = _optimize(tree, small=True)
+    _weight, newtree = _optimize(tree, small=True, order=_defineorder)
     return newtree
 
 # the set of valid characters for the initial letter of symbols in