From patchwork Fri Jun 3 15:02:26 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [3, of, 7, V2] revset: insert _reorder to fix ordering of inner OR and some functions (BC) From: Yuya Nishihara X-Patchwork-Id: 15374 Message-Id: To: mercurial-devel@mercurial-scm.org Date: Sat, 04 Jun 2016 00:02:26 +0900 # HG changeset patch # User Yuya Nishihara # Date 1455629461 -32400 # Tue Feb 16 22:31:01 2016 +0900 # Node ID cdf3cd369a4e4f114b2ab555de1d3b30c8c3827a # Parent f3c0deae1fd126d7340929c0c0612b88a4f5aa0e revset: insert _reorder to fix ordering of inner OR and some functions (BC) This fixes the order of 'x & (y | z)' and 'x & _list(...)' by inserting '_reorder' function. Still 'x & y:z', 'x & reverse(y)' and 'x & sort(y)' have the ordering issue, which should be fixed by subsequent patches. Originally I was going to add a property to @predicate to mark a function, but that seems not ideal because most functions should not have such property, and we will need a different optimization trick per function. diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -1153,6 +1153,7 @@ def head(repo, subset, x): # This does not break because of other fullreposet misbehavior. # XXX We should combine with subset first: 'subset & baseset(...)'. This is # necessary to ensure we preserve the order in subset. + # Remove 'head' from _orderingsymbols if you've fixed the issue. return baseset(hs) & subset @predicate('heads(set)', safe=True) @@ -1685,6 +1686,13 @@ def removes(repo, subset, x): pat = getstring(x, _("removes requires a pattern")) return checkstatus(repo, subset, pat, 2) +@predicate('_reorder(set)', safe=True) +def _reorder(repo, subset, x): + # Evaluate right-hand-side expression 'set' and reorder the result set + # according to the left-hand-side set 'subset' + rs = getset(repo, subset, x) + return subset & rs + @predicate('rev(number)', safe=True) def rev(repo, subset, x): """Revision with the given numeric identifier. @@ -2067,6 +2075,18 @@ methods = { "parentpost": p1, } +# symbols which have own ordering rules and disregard the order of input set. +# few symbols should be listed. almost all symbols should follow the input +# order (i.e. they should do 'subset & x' or 'subset.filter(x)'.) +_orderingsymbols = set([ + 'head', + 'reverse', + 'sort', + '_list', + '_intlist', + '_hexlist', +]) + # constants for ordering policy, used in _optimize(): # 1. starts with 'define' # 2. shifts to 'follow' by 'x & y' @@ -2085,6 +2105,11 @@ methods = { _followorder: _followorder, } +def _fixfolloworder(x, order): + if order == _followorder: + return ('func', ('symbol', '_reorder'), x) + return x + def _matchonly(revs, bases): """ >>> f = lambda *args: _matchonly(*map(parse, args)) @@ -2183,7 +2208,7 @@ def _optimize(x, small, order): # we can't reorder trees by weight because it would change the order. # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a") # ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0])) - return max(ws), (op,) + tuple(ts) + return max(ws), _fixfolloworder((op,) + tuple(ts), order) elif op == 'not': # Optimize not public() to _notpublic() because we have a fast version if x[1] == ('func', ('symbol', 'public'), None): @@ -2231,7 +2256,10 @@ def _optimize(x, small, order): w = 10 # assume most sorts look at changelog else: w = 1 - return w + wa, (op, x[1], ta) + t = (op, x[1], ta) + if f in _orderingsymbols: + t = _fixfolloworder(t, order) + return w + wa, t return 1, x def optimize(tree): diff --git a/tests/test-revset.t b/tests/test-revset.t --- a/tests/test-revset.t +++ b/tests/test-revset.t @@ -908,6 +908,376 @@ Test order of revisions in compound expr 1 0 + $ log '2:0 & head()' + 2 + 1 + 0 + + '_reorder' should be inserted where necessary: + + $ try --optimize '2:0 & (0 + 1 + 2)' + (and + (range + ('symbol', '2') + ('symbol', '0')) + (group + (or + ('symbol', '0') + ('symbol', '1') + ('symbol', '2')))) + * optimized: + (and + (range + ('symbol', '2') + ('symbol', '0')) + (func + ('symbol', '_reorder') + (func + ('symbol', '_list') + ('string', '0\x001\x002')))) + * set: + , + > + 2 + 1 + 0 + + $ try --optimize '2:0 & (0:1 + 2)' + (and + (range + ('symbol', '2') + ('symbol', '0')) + (group + (or + (range + ('symbol', '0') + ('symbol', '1')) + ('symbol', '2')))) + * optimized: + (and + (range + ('symbol', '2') + ('symbol', '0')) + (func + ('symbol', '_reorder') + (or + (range + ('symbol', '0') + ('symbol', '1')) + ('symbol', '2')))) + * set: + , + , + >, + >> + 2 + 1 + 0 + + '_reorder' should be omitted if order doesn't matter: + + $ try --optimize '2:0 & not (0 + 1)' + (and + (range + ('symbol', '2') + ('symbol', '0')) + (not + (group + (or + ('symbol', '0') + ('symbol', '1'))))) + * optimized: + (difference + (range + ('symbol', '2') + ('symbol', '0')) + (func + ('symbol', '_list') + ('string', '0\x001'))) + * set: + , + >> + 2 + + $ try --optimize '2:0 & not (0:2 & (0 + 1))' + (and + (range + ('symbol', '2') + ('symbol', '0')) + (not + (group + (and + (range + ('symbol', '0') + ('symbol', '2')) + (group + (or + ('symbol', '0') + ('symbol', '1'))))))) + * optimized: + (difference + (range + ('symbol', '2') + ('symbol', '0')) + (and + (range + ('symbol', '0') + ('symbol', '2')) + (func + ('symbol', '_list') + ('string', '0\x001')))) + * set: + , + >> + 2 + + BROKEN: reverse() and sort() will need different workaround since they + modify the flag of input set: + + $ try --optimize '0:2 & reverse(all())' + (and + (range + ('symbol', '0') + ('symbol', '2')) + (func + ('symbol', 'reverse') + (func + ('symbol', 'all') + None))) + * optimized: + (and + (range + ('symbol', '0') + ('symbol', '2')) + (func + ('symbol', '_reorder') + (func + ('symbol', 'reverse') + (func + ('symbol', 'all') + None)))) + * set: + , + , + >> + 2 + 1 + 0 + + $ try --optimize '0:2 & sort(all(), -rev)' + (and + (range + ('symbol', '0') + ('symbol', '2')) + (func + ('symbol', 'sort') + (list + (func + ('symbol', 'all') + None) + (negate + ('symbol', 'rev'))))) + * optimized: + (and + (range + ('symbol', '0') + ('symbol', '2')) + (func + ('symbol', '_reorder') + (func + ('symbol', 'sort') + (list + (func + ('symbol', 'all') + None) + ('string', '-rev'))))) + * set: + , + , + >> + 2 + 1 + 0 + + for 'A & f(B)', 'B' should not be affected by the order of 'A': + + $ try --optimize '2:0 & first(1 + 0 + 2)' + (and + (range + ('symbol', '2') + ('symbol', '0')) + (func + ('symbol', 'first') + (or + ('symbol', '1') + ('symbol', '0') + ('symbol', '2')))) + * optimized: + (and + (range + ('symbol', '2') + ('symbol', '0')) + (func + ('symbol', 'first') + (func + ('symbol', '_list') + ('string', '1\x000\x002')))) + * set: + , + >> + 1 + + $ try --optimize '2:0 & not last(0 + 2 + 1)' + (and + (range + ('symbol', '2') + ('symbol', '0')) + (not + (func + ('symbol', 'last') + (or + ('symbol', '0') + ('symbol', '2') + ('symbol', '1'))))) + * optimized: + (difference + (range + ('symbol', '2') + ('symbol', '0')) + (func + ('symbol', 'last') + (func + ('symbol', '_list') + ('string', '0\x002\x001')))) + * set: + , + , + >>>> + 2 + 0 + + for 'A & (op)(B)', 'B' should not be affected by the order of 'A': + + $ try --optimize '2:0 & (1 + 0 + 2):(0 + 2 + 1)' + (and + (range + ('symbol', '2') + ('symbol', '0')) + (range + (group + (or + ('symbol', '1') + ('symbol', '0') + ('symbol', '2'))) + (group + (or + ('symbol', '0') + ('symbol', '2') + ('symbol', '1'))))) + * optimized: + (and + (range + ('symbol', '2') + ('symbol', '0')) + (range + (func + ('symbol', '_list') + ('string', '1\x000\x002')) + (func + ('symbol', '_list') + ('string', '0\x002\x001')))) + * set: + , + > + 1 + + 'A & B' can be rewritten as 'B & A' by weight, but that's fine as long as + the ordering policy is determined before the rewrite; in this example, + 'B' follows the order of the initial set, which is the same order as 'A' + since 'A' also follows the order: + + $ try --optimize 'contains("glob:*") & (2 + 0 + 1)' + (and + (func + ('symbol', 'contains') + ('string', 'glob:*')) + (group + (or + ('symbol', '2') + ('symbol', '0') + ('symbol', '1')))) + * optimized: + (and + (func + ('symbol', '_reorder') + (func + ('symbol', '_list') + ('string', '2\x000\x001'))) + (func + ('symbol', 'contains') + ('string', 'glob:*'))) + * set: + , + > + 0 + 1 + 2 + + and in this example, 'A & B' is rewritten as 'B & A', but 'A' overrides + the order appropriately: + + $ try --optimize 'reverse(contains("glob:*")) & (0 + 2 + 1)' + (and + (func + ('symbol', 'reverse') + (func + ('symbol', 'contains') + ('string', 'glob:*'))) + (group + (or + ('symbol', '0') + ('symbol', '2') + ('symbol', '1')))) + * optimized: + (and + (func + ('symbol', '_reorder') + (func + ('symbol', '_list') + ('string', '0\x002\x001'))) + (func + ('symbol', 'reverse') + (func + ('symbol', 'contains') + ('string', 'glob:*')))) + * set: + , + > + 2 + 1 + 0 + test sort revset --------------------------------------------