Patchwork [3,of,7,V2] revset: insert _reorder to fix ordering of inner OR and some functions (BC)

login
register
mail settings
Submitter Yuya Nishihara
Date June 3, 2016, 3:02 p.m.
Message ID <cdf3cd369a4e4f114b2a.1464966146@mimosa>
Download mbox | patch
Permalink /patch/15374/
State Changes Requested
Delegated to: Augie Fackler
Headers show

Comments

Yuya Nishihara - June 3, 2016, 3:02 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1455629461 -32400
#      Tue Feb 16 22:31:01 2016 +0900
# Node ID cdf3cd369a4e4f114b2ab555de1d3b30c8c3827a
# Parent  f3c0deae1fd126d7340929c0c0612b88a4f5aa0e
revset: insert _reorder to fix ordering of inner OR and some functions (BC)

This fixes the order of 'x & (y | z)' and 'x & _list(...)' by inserting
'_reorder' function. Still 'x & y:z', 'x & reverse(y)' and 'x & sort(y)'
have the ordering issue, which should be fixed by subsequent patches.

Originally I was going to add a property to @predicate to mark a function,
but that seems not ideal because most functions should not have such property,
and we will need a different optimization trick per function.

Patch

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -1153,6 +1153,7 @@  def head(repo, subset, x):
     # This does not break because of other fullreposet misbehavior.
     # XXX We should combine with subset first: 'subset & baseset(...)'. This is
     # necessary to ensure we preserve the order in subset.
+    # Remove 'head' from _orderingsymbols if you've fixed the issue.
     return baseset(hs) & subset
 
 @predicate('heads(set)', safe=True)
@@ -1685,6 +1686,13 @@  def removes(repo, subset, x):
     pat = getstring(x, _("removes requires a pattern"))
     return checkstatus(repo, subset, pat, 2)
 
+@predicate('_reorder(set)', safe=True)
+def _reorder(repo, subset, x):
+    # Evaluate right-hand-side expression 'set' and reorder the result set
+    # according to the left-hand-side set 'subset'
+    rs = getset(repo, subset, x)
+    return subset & rs
+
 @predicate('rev(number)', safe=True)
 def rev(repo, subset, x):
     """Revision with the given numeric identifier.
@@ -2067,6 +2075,18 @@  methods = {
     "parentpost": p1,
 }
 
+# symbols which have own ordering rules and disregard the order of input set.
+# few symbols should be listed. almost all symbols should follow the input
+# order (i.e. they should do 'subset & x' or 'subset.filter(x)'.)
+_orderingsymbols = set([
+    'head',
+    'reverse',
+    'sort',
+    '_list',
+    '_intlist',
+    '_hexlist',
+])
+
 # constants for ordering policy, used in _optimize():
 # 1. starts with 'define'
 # 2. shifts to 'follow' by 'x & y'
@@ -2085,6 +2105,11 @@  methods = {
     _followorder: _followorder,
 }
 
+def _fixfolloworder(x, order):
+    if order == _followorder:
+        return ('func', ('symbol', '_reorder'), x)
+    return x
+
 def _matchonly(revs, bases):
     """
     >>> f = lambda *args: _matchonly(*map(parse, args))
@@ -2183,7 +2208,7 @@  def _optimize(x, small, order):
         # we can't reorder trees by weight because it would change the order.
         # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
         #   ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
-        return max(ws), (op,) + tuple(ts)
+        return max(ws), _fixfolloworder((op,) + tuple(ts), order)
     elif op == 'not':
         # Optimize not public() to _notpublic() because we have a fast version
         if x[1] == ('func', ('symbol', 'public'), None):
@@ -2231,7 +2256,10 @@  def _optimize(x, small, order):
             w = 10 # assume most sorts look at changelog
         else:
             w = 1
-        return w + wa, (op, x[1], ta)
+        t = (op, x[1], ta)
+        if f in _orderingsymbols:
+            t = _fixfolloworder(t, order)
+        return w + wa, t
     return 1, x
 
 def optimize(tree):
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -908,6 +908,376 @@  Test order of revisions in compound expr
   1
   0
 
+  $ log '2:0 & head()'
+  2
+  1
+  0
+
+ '_reorder' should be inserted where necessary:
+
+  $ try --optimize '2:0 & (0 + 1 + 2)'
+  (and
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (group
+      (or
+        ('symbol', '0')
+        ('symbol', '1')
+        ('symbol', '2'))))
+  * optimized:
+  (and
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (func
+      ('symbol', '_reorder')
+      (func
+        ('symbol', '_list')
+        ('string', '0\x001\x002'))))
+  * set:
+  <filteredset
+    <spanset- 0:2>,
+    <baseset [0, 1, 2]>>
+  2
+  1
+  0
+
+  $ try --optimize '2:0 & (0:1 + 2)'
+  (and
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (group
+      (or
+        (range
+          ('symbol', '0')
+          ('symbol', '1'))
+        ('symbol', '2'))))
+  * optimized:
+  (and
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (func
+      ('symbol', '_reorder')
+      (or
+        (range
+          ('symbol', '0')
+          ('symbol', '1'))
+        ('symbol', '2'))))
+  * set:
+  <filteredset
+    <spanset- 0:2>,
+    <addset
+      <filteredset
+        <spanset+ 0:1>,
+        <spanset- 0:2>>,
+      <baseset [2]>>>
+  2
+  1
+  0
+
+ '_reorder' should be omitted if order doesn't matter:
+
+  $ try --optimize '2:0 & not (0 + 1)'
+  (and
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (not
+      (group
+        (or
+          ('symbol', '0')
+          ('symbol', '1')))))
+  * optimized:
+  (difference
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (func
+      ('symbol', '_list')
+      ('string', '0\x001')))
+  * set:
+  <filteredset
+    <spanset- 0:2>,
+    <not
+      <baseset [0, 1]>>>
+  2
+
+  $ try --optimize '2:0 & not (0:2 & (0 + 1))'
+  (and
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (not
+      (group
+        (and
+          (range
+            ('symbol', '0')
+            ('symbol', '2'))
+          (group
+            (or
+              ('symbol', '0')
+              ('symbol', '1')))))))
+  * optimized:
+  (difference
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (and
+      (range
+        ('symbol', '0')
+        ('symbol', '2'))
+      (func
+        ('symbol', '_list')
+        ('string', '0\x001'))))
+  * set:
+  <filteredset
+    <spanset- 0:2>,
+    <not
+      <baseset [0, 1]>>>
+  2
+
+ BROKEN: reverse() and sort() will need different workaround since they
+ modify the flag of input set:
+
+  $ try --optimize '0:2 & reverse(all())'
+  (and
+    (range
+      ('symbol', '0')
+      ('symbol', '2'))
+    (func
+      ('symbol', 'reverse')
+      (func
+        ('symbol', 'all')
+        None)))
+  * optimized:
+  (and
+    (range
+      ('symbol', '0')
+      ('symbol', '2'))
+    (func
+      ('symbol', '_reorder')
+      (func
+        ('symbol', 'reverse')
+        (func
+          ('symbol', 'all')
+          None))))
+  * set:
+  <filteredset
+    <spanset- 0:2>,
+    <filteredset
+      <spanset- 0:2>,
+      <spanset+ 0:9>>>
+  2
+  1
+  0
+
+  $ try --optimize '0:2 & sort(all(), -rev)'
+  (and
+    (range
+      ('symbol', '0')
+      ('symbol', '2'))
+    (func
+      ('symbol', 'sort')
+      (list
+        (func
+          ('symbol', 'all')
+          None)
+        (negate
+          ('symbol', 'rev')))))
+  * optimized:
+  (and
+    (range
+      ('symbol', '0')
+      ('symbol', '2'))
+    (func
+      ('symbol', '_reorder')
+      (func
+        ('symbol', 'sort')
+        (list
+          (func
+            ('symbol', 'all')
+            None)
+          ('string', '-rev')))))
+  * set:
+  <filteredset
+    <spanset- 0:2>,
+    <filteredset
+      <spanset- 0:2>,
+      <spanset+ 0:9>>>
+  2
+  1
+  0
+
+ for 'A & f(B)', 'B' should not be affected by the order of 'A':
+
+  $ try --optimize '2:0 & first(1 + 0 + 2)'
+  (and
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (func
+      ('symbol', 'first')
+      (or
+        ('symbol', '1')
+        ('symbol', '0')
+        ('symbol', '2'))))
+  * optimized:
+  (and
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (func
+      ('symbol', 'first')
+      (func
+        ('symbol', '_list')
+        ('string', '1\x000\x002'))))
+  * set:
+  <baseset
+    <limit n=1, offset=0,
+      <spanset- 0:2>,
+      <baseset [1, 0, 2]>>>
+  1
+
+  $ try --optimize '2:0 & not last(0 + 2 + 1)'
+  (and
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (not
+      (func
+        ('symbol', 'last')
+        (or
+          ('symbol', '0')
+          ('symbol', '2')
+          ('symbol', '1')))))
+  * optimized:
+  (difference
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (func
+      ('symbol', 'last')
+      (func
+        ('symbol', '_list')
+        ('string', '0\x002\x001'))))
+  * set:
+  <filteredset
+    <spanset- 0:2>,
+    <not
+      <baseset
+        <last n=1,
+          <fullreposet+ 0:9>,
+          <baseset [1, 2, 0]>>>>>
+  2
+  0
+
+ for 'A & (op)(B)', 'B' should not be affected by the order of 'A':
+
+  $ try --optimize '2:0 & (1 + 0 + 2):(0 + 2 + 1)'
+  (and
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (range
+      (group
+        (or
+          ('symbol', '1')
+          ('symbol', '0')
+          ('symbol', '2')))
+      (group
+        (or
+          ('symbol', '0')
+          ('symbol', '2')
+          ('symbol', '1')))))
+  * optimized:
+  (and
+    (range
+      ('symbol', '2')
+      ('symbol', '0'))
+    (range
+      (func
+        ('symbol', '_list')
+        ('string', '1\x000\x002'))
+      (func
+        ('symbol', '_list')
+        ('string', '0\x002\x001'))))
+  * set:
+  <filteredset
+    <baseset [1]>,
+    <spanset- 0:2>>
+  1
+
+ 'A & B' can be rewritten as 'B & A' by weight, but that's fine as long as
+ the ordering policy is determined before the rewrite; in this example,
+ 'B' follows the order of the initial set, which is the same order as 'A'
+ since 'A' also follows the order:
+
+  $ try --optimize 'contains("glob:*") & (2 + 0 + 1)'
+  (and
+    (func
+      ('symbol', 'contains')
+      ('string', 'glob:*'))
+    (group
+      (or
+        ('symbol', '2')
+        ('symbol', '0')
+        ('symbol', '1'))))
+  * optimized:
+  (and
+    (func
+      ('symbol', '_reorder')
+      (func
+        ('symbol', '_list')
+        ('string', '2\x000\x001')))
+    (func
+      ('symbol', 'contains')
+      ('string', 'glob:*')))
+  * set:
+  <filteredset
+    <baseset+ [0, 1, 2]>,
+    <contains 'glob:*'>>
+  0
+  1
+  2
+
+ and in this example, 'A & B' is rewritten as 'B & A', but 'A' overrides
+ the order appropriately:
+
+  $ try --optimize 'reverse(contains("glob:*")) & (0 + 2 + 1)'
+  (and
+    (func
+      ('symbol', 'reverse')
+      (func
+        ('symbol', 'contains')
+        ('string', 'glob:*')))
+    (group
+      (or
+        ('symbol', '0')
+        ('symbol', '2')
+        ('symbol', '1'))))
+  * optimized:
+  (and
+    (func
+      ('symbol', '_reorder')
+      (func
+        ('symbol', '_list')
+        ('string', '0\x002\x001')))
+    (func
+      ('symbol', 'reverse')
+      (func
+        ('symbol', 'contains')
+        ('string', 'glob:*'))))
+  * set:
+  <filteredset
+    <baseset- [0, 1, 2]>,
+    <contains 'glob:*'>>
+  2
+  1
+  0
+
 test sort revset
 --------------------------------------------