Submitter | Yuya Nishihara |
---|---|

Date | June 3, 2016, 3:02 p.m. |

Message ID | <f3c0deae1fd126d73409.1464966145@mimosa> |

Download | mbox | patch |

Permalink | /patch/15373/ |

State | Changes Requested |

Delegated to: | Augie Fackler |

Headers | show |

## Comments

On Sat, Jun 04, 2016 at 12:02:25AM +0900, Yuya Nishihara wrote: > # HG changeset patch > # User Yuya Nishihara <yuya@tcha.org> > # Date 1455627736 -32400 > # Tue Feb 16 22:02:16 2016 +0900 > # Node ID f3c0deae1fd126d7340929c0c0612b88a4f5aa0e > # Parent d200ad9058574d91a2b2b7a438602cb34b03ddbe > revset: teach optimize if current expression should define/follow order > > New flag 'order' will be used to fix a couple of ordering bugs, such as: > > a) 'x & (y | z)' disregards the order of 'x' (issue 5100) > b) 'x & y:z' is listed from 'y' to 'z' > c) 'x & y' can be rewritten as 'y & x' if weight(x) > weight(y) > > (c) is a hidden bug of optimize(), which could be exposed if we've fixed > (a) and (b) in some way. (a) and (b) are not bugs of optimize(), but they > can be addressed well in optimize() since optimize() knows the context. > > 'order' is tri-state. It starts with 'define', and shifts to 'follow' by > 'x & y'. It changes back to 'define' on function call 'f(x)' or function-like > operation 'x (f) y' because 'f' may have its own ordering requirement for 'x' > and 'y'. The state 'any' will allow us to avoid extra cost that would be > necessary to constrain ordering where it isn't important, 'not x'. For details, > see also the tests added by the next patch. I'm going to duplicate what you said and see if I understand correctly: If 'order' is set to 'define', then any future binary operator can change the ordering of the entries in the set. If 'order' is set to 'follow', then any future binary operators should take the ordering specified by the first operand to the binary operator. Right? I'm not sure I follow 'any' yet, but I haven't looked at that patch either. > > The initial order will be switched whether an initial set is provided or not: > > m(repo) # 'define': order is not defined yet > m(repo, subset) # 'follow': order should be defined by subset > > diff --git a/mercurial/revset.py b/mercurial/revset.py > --- a/mercurial/revset.py > +++ b/mercurial/revset.py > @@ -2067,6 +2067,24 @@ methods = { > "parentpost": p1, > } > > +# constants for ordering policy, used in _optimize(): > +# 1. starts with 'define' > +# 2. shifts to 'follow' by 'x & y' > +# 3. changes back to 'define' on function call 'f(x)' or function-like > +# operation 'x (f) y' because 'f' may have its own ordering requirement > +# for 'x' and 'y' (e.g. 'first(x)') > +# 4. 'any' avoids extra cost of ordering where it isn't important, 'not x' > +_anyorder = 'any' # don't care the order (e.g. 'not x') > +_defineorder = 'define' # should define the order (e.g. 'x', '_(x)', 'x + y') > +_followorder = 'follow' # must follow the current order (e.g. '_ & y') > + > +# transition table for 'x & y', from the current expression 'x' to 'y' > +_tofolloworder = { > + _anyorder: _anyorder, > + _defineorder: _followorder, > + _followorder: _followorder, > +} > + > def _matchonly(revs, bases): > """ > >>> f = lambda *args: _matchonly(*map(parse, args)) > @@ -2082,7 +2100,11 @@ def _matchonly(revs, bases): > and getstring(bases[1][1], _('not a symbol')) == 'ancestors'): > return ('list', revs[2], bases[1][2]) > > -def _optimize(x, small): > +def _optimize(x, small, order): > + """ > + 'order' specifies how the current expression 'x' is ordered (see the > + constants defined above) > + """ > if x is None: > return 0, x > > @@ -2092,30 +2114,32 @@ def _optimize(x, small): > > op = x[0] > if op == 'minus': > - return _optimize(('and', x[1], ('not', x[2])), small) > + return _optimize(('and', x[1], ('not', x[2])), small, order) > elif op == 'only': > t = ('func', ('symbol', 'only'), ('list', x[1], x[2])) > - return _optimize(t, small) > + return _optimize(t, small, order) > elif op == 'onlypost': > - return _optimize(('func', ('symbol', 'only'), x[1]), small) > + return _optimize(('func', ('symbol', 'only'), x[1]), small, order) > elif op == 'dagrangepre': > - return _optimize(('func', ('symbol', 'ancestors'), x[1]), small) > + return _optimize(('func', ('symbol', 'ancestors'), x[1]), small, order) > elif op == 'dagrangepost': > - return _optimize(('func', ('symbol', 'descendants'), x[1]), small) > + t = ('func', ('symbol', 'descendants'), x[1]) > + return _optimize(t, small, order) > elif op == 'rangeall': > - return _optimize(('range', ('string', '0'), ('string', 'tip')), small) > + t = ('range', ('string', '0'), ('string', 'tip')) > + return _optimize(t, small, order) > elif op == 'rangepre': > - return _optimize(('range', ('string', '0'), x[1]), small) > + return _optimize(('range', ('string', '0'), x[1]), small, order) > elif op == 'rangepost': > - return _optimize(('range', x[1], ('string', 'tip')), small) > + return _optimize(('range', x[1], ('string', 'tip')), small, order) > elif op == 'negate': > s = getstring(x[1], _("can't negate that")) > - return _optimize(('string', '-' + s), small) > + return _optimize(('string', '-' + s), small, order) > elif op in 'string symbol negate': > return smallbonus, x # single revisions are small > elif op == 'and': > - wa, ta = _optimize(x[1], True) > - wb, tb = _optimize(x[2], True) > + wa, ta = _optimize(x[1], True, order) > + wb, tb = _optimize(x[2], True, _tofolloworder[order]) > w = min(wa, wb) > > # (::x and not ::y)/(not ::y and ::x) have a fast path > @@ -2141,12 +2165,12 @@ def _optimize(x, small): > else: > s = '\0'.join(t[1] for w, t in ss) > y = ('func', ('symbol', '_list'), ('string', s)) > - w, t = _optimize(y, False) > + w, t = _optimize(y, False, order) > ws.append(w) > ts.append(t) > del ss[:] > for y in x[1:]: > - w, t = _optimize(y, False) > + w, t = _optimize(y, False, order) > if t is not None and (t[0] == 'string' or t[0] == 'symbol'): > ss.append((w, t)) > continue > @@ -2164,34 +2188,34 @@ def _optimize(x, small): > # Optimize not public() to _notpublic() because we have a fast version > if x[1] == ('func', ('symbol', 'public'), None): > newsym = ('func', ('symbol', '_notpublic'), None) > - o = _optimize(newsym, not small) > + o = _optimize(newsym, not small, order) > return o[0], o[1] > else: > - o = _optimize(x[1], not small) > + o = _optimize(x[1], not small, _anyorder) > return o[0], (op, o[1]) > elif op == 'parentpost': > - o = _optimize(x[1], small) > + o = _optimize(x[1], small, _defineorder) > return o[0], (op, o[1]) > elif op == 'group': > - return _optimize(x[1], small) > + return _optimize(x[1], small, order) > elif op in 'dagrange range parent ancestorspec': > if op == 'parent': > # x^:y means (x^) : y, not x ^ (:y) > post = ('parentpost', x[1]) > if x[2][0] == 'dagrangepre': > - return _optimize(('dagrange', post, x[2][1]), small) > + return _optimize(('dagrange', post, x[2][1]), small, order) > elif x[2][0] == 'rangepre': > - return _optimize(('range', post, x[2][1]), small) > - > - wa, ta = _optimize(x[1], small) > - wb, tb = _optimize(x[2], small) > + return _optimize(('range', post, x[2][1]), small, order) > + > + wa, ta = _optimize(x[1], small, _defineorder) > + wb, tb = _optimize(x[2], small, _defineorder) > return wa + wb, (op, ta, tb) > elif op == 'list': > - ws, ts = zip(*(_optimize(y, small) for y in x[1:])) > + ws, ts = zip(*(_optimize(y, small, order) for y in x[1:])) > return sum(ws), (op,) + ts > elif op == 'func': > f = getstring(x[1], _("not a symbol")) > - wa, ta = _optimize(x[2], small) > + wa, ta = _optimize(x[2], small, _defineorder) > if f in ("author branch closed date desc file grep keyword " > "outgoing user"): > w = 10 # slow > @@ -2211,7 +2235,7 @@ def _optimize(x, small): > return 1, x > > def optimize(tree): > - _weight, newtree = _optimize(tree, small=True) > + _weight, newtree = _optimize(tree, small=True, order=_defineorder) > return newtree > > # the set of valid characters for the initial letter of symbols in > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

On Mon, 6 Jun 2016 12:21:23 -0400, Augie Fackler wrote: > On Sat, Jun 04, 2016 at 12:02:25AM +0900, Yuya Nishihara wrote: > > # HG changeset patch > > # User Yuya Nishihara <yuya@tcha.org> > > # Date 1455627736 -32400 > > # Tue Feb 16 22:02:16 2016 +0900 > > # Node ID f3c0deae1fd126d7340929c0c0612b88a4f5aa0e > > # Parent d200ad9058574d91a2b2b7a438602cb34b03ddbe > > revset: teach optimize if current expression should define/follow order > > > > New flag 'order' will be used to fix a couple of ordering bugs, such as: > > > > a) 'x & (y | z)' disregards the order of 'x' (issue 5100) > > b) 'x & y:z' is listed from 'y' to 'z' > > c) 'x & y' can be rewritten as 'y & x' if weight(x) > weight(y) > > > > (c) is a hidden bug of optimize(), which could be exposed if we've fixed > > (a) and (b) in some way. (a) and (b) are not bugs of optimize(), but they > > can be addressed well in optimize() since optimize() knows the context. > > > > 'order' is tri-state. It starts with 'define', and shifts to 'follow' by > > 'x & y'. It changes back to 'define' on function call 'f(x)' or function-like > > operation 'x (f) y' because 'f' may have its own ordering requirement for 'x' > > and 'y'. The state 'any' will allow us to avoid extra cost that would be > > necessary to constrain ordering where it isn't important, 'not x'. For details, > > see also the tests added by the next patch. > > I'm going to duplicate what you said and see if I understand correctly: > > If 'order' is set to 'define', then any future binary operator can > change the ordering of the entries in the set. If 'order' is set to > 'follow', then any future binary operators should take the ordering > specified by the first operand to the binary operator. Right? s/any future binary operators/any future functions and operations/ s/the first operand to the binary operator/the first operand to the '&'/ For instance, "X & (Y | Z)" is evaluated as "or(y(x()), z(x()))". In this case, "x()" can change the order of the entries in the set, but "y()", "z()" and "or()" shouldn't. Because "or()" is known to change the order, we'll need to fix the result of "or()". > I'm not sure I follow 'any' yet, but I haven't looked at that patch either. 'any' means the order doesn't matter. For "not X", "x()" may change the order of the entries in the initial set. So for "not (x | y)", we don't need to fix the result of "or()". And "not sort(x)" can be optimized to "not x".

On Tue, Jun 07, 2016 at 10:16:27PM +0900, Yuya Nishihara wrote: > On Mon, 6 Jun 2016 12:21:23 -0400, Augie Fackler wrote: > > On Sat, Jun 04, 2016 at 12:02:25AM +0900, Yuya Nishihara wrote: > > > # HG changeset patch > > > # User Yuya Nishihara <yuya@tcha.org> > > > # Date 1455627736 -32400 > > > # Tue Feb 16 22:02:16 2016 +0900 > > > # Node ID f3c0deae1fd126d7340929c0c0612b88a4f5aa0e > > > # Parent d200ad9058574d91a2b2b7a438602cb34b03ddbe > > > revset: teach optimize if current expression should define/follow order > > > > > > New flag 'order' will be used to fix a couple of ordering bugs, such as: > > > > > > a) 'x & (y | z)' disregards the order of 'x' (issue 5100) > > > b) 'x & y:z' is listed from 'y' to 'z' > > > c) 'x & y' can be rewritten as 'y & x' if weight(x) > weight(y) > > > > > > (c) is a hidden bug of optimize(), which could be exposed if we've fixed > > > (a) and (b) in some way. (a) and (b) are not bugs of optimize(), but they > > > can be addressed well in optimize() since optimize() knows the context. > > > > > > 'order' is tri-state. It starts with 'define', and shifts to 'follow' by > > > 'x & y'. It changes back to 'define' on function call 'f(x)' or function-like > > > operation 'x (f) y' because 'f' may have its own ordering requirement for 'x' > > > and 'y'. The state 'any' will allow us to avoid extra cost that would be > > > necessary to constrain ordering where it isn't important, 'not x'. For details, > > > see also the tests added by the next patch. > > > > I'm going to duplicate what you said and see if I understand correctly: > > > > If 'order' is set to 'define', then any future binary operator can > > change the ordering of the entries in the set. If 'order' is set to > > 'follow', then any future binary operators should take the ordering > > specified by the first operand to the binary operator. Right? > > s/any future binary operators/any future functions and operations/ > s/the first operand to the binary operator/the first operand to the '&'/ > > For instance, "X & (Y | Z)" is evaluated as "or(y(x()), z(x()))". In this > case, "x()" can change the order of the entries in the set, but "y()", "z()" > and "or()" shouldn't. Because "or()" is known to change the order, we'll need > to fix the result of "or()". > > > I'm not sure I follow 'any' yet, but I haven't looked at that patch either. > > 'any' means the order doesn't matter. For "not X", "x()" may change the order > of the entries in the initial set. So for "not (x | y)", we don't need to fix > the result of "or()". And "not sort(x)" can be optimized to "not x". > Ah, okay. Could I persuade you to send an updated version of this with the above clarification in comments near the relevant constants? I think that would greatly help future code archaeology. > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

On Thu, 9 Jun 2016 23:43:57 -0400, Augie Fackler wrote: > On Tue, Jun 07, 2016 at 10:16:27PM +0900, Yuya Nishihara wrote: > > On Mon, 6 Jun 2016 12:21:23 -0400, Augie Fackler wrote: > > > On Sat, Jun 04, 2016 at 12:02:25AM +0900, Yuya Nishihara wrote: > > > > # HG changeset patch > > > > # User Yuya Nishihara <yuya@tcha.org> > > > > # Date 1455627736 -32400 > > > > # Tue Feb 16 22:02:16 2016 +0900 > > > > # Node ID f3c0deae1fd126d7340929c0c0612b88a4f5aa0e > > > > # Parent d200ad9058574d91a2b2b7a438602cb34b03ddbe > > > > revset: teach optimize if current expression should define/follow order > > > > > > > > New flag 'order' will be used to fix a couple of ordering bugs, such as: > > > > > > > > a) 'x & (y | z)' disregards the order of 'x' (issue 5100) > > > > b) 'x & y:z' is listed from 'y' to 'z' > > > > c) 'x & y' can be rewritten as 'y & x' if weight(x) > weight(y) > > > > > > > > (c) is a hidden bug of optimize(), which could be exposed if we've fixed > > > > (a) and (b) in some way. (a) and (b) are not bugs of optimize(), but they > > > > can be addressed well in optimize() since optimize() knows the context. > > > > > > > > 'order' is tri-state. It starts with 'define', and shifts to 'follow' by > > > > 'x & y'. It changes back to 'define' on function call 'f(x)' or function-like > > > > operation 'x (f) y' because 'f' may have its own ordering requirement for 'x' > > > > and 'y'. The state 'any' will allow us to avoid extra cost that would be > > > > necessary to constrain ordering where it isn't important, 'not x'. For details, > > > > see also the tests added by the next patch. > > > > > > I'm going to duplicate what you said and see if I understand correctly: > > > > > > If 'order' is set to 'define', then any future binary operator can > > > change the ordering of the entries in the set. If 'order' is set to > > > 'follow', then any future binary operators should take the ordering > > > specified by the first operand to the binary operator. Right? > > > > s/any future binary operators/any future functions and operations/ > > s/the first operand to the binary operator/the first operand to the '&'/ > > > > For instance, "X & (Y | Z)" is evaluated as "or(y(x()), z(x()))". In this > > case, "x()" can change the order of the entries in the set, but "y()", "z()" > > and "or()" shouldn't. Because "or()" is known to change the order, we'll need > > to fix the result of "or()". > > > > > I'm not sure I follow 'any' yet, but I haven't looked at that patch either. > > > > 'any' means the order doesn't matter. For "not X", "x()" may change the order > > of the entries in the initial set. So for "not (x | y)", we don't need to fix > > the result of "or()". And "not sort(x)" can be optimized to "not x". > > > > Ah, okay. Could I persuade you to send an updated version of this with > the above clarification in comments near the relevant constants? I > think that would greatly help future code archaeology. Ok, my last explanation is not perfect, but it will be better than nothing. I'll resend after Martijn's changes are queued.

> On Jun 10, 2016, at 9:32 AM, Yuya Nishihara <yuya@tcha.org> wrote: > > Ok, my last explanation is not perfect, but it will be better than nothing. > I'll resend after Martijn's changes are queued. Thanks. I couldnâ€™t really come up with a better explanation than what you gave, but it helped a lot.

## Patch

diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -2067,6 +2067,24 @@ methods = { "parentpost": p1, } +# constants for ordering policy, used in _optimize(): +# 1. starts with 'define' +# 2. shifts to 'follow' by 'x & y' +# 3. changes back to 'define' on function call 'f(x)' or function-like +# operation 'x (f) y' because 'f' may have its own ordering requirement +# for 'x' and 'y' (e.g. 'first(x)') +# 4. 'any' avoids extra cost of ordering where it isn't important, 'not x' +_anyorder = 'any' # don't care the order (e.g. 'not x') +_defineorder = 'define' # should define the order (e.g. 'x', '_(x)', 'x + y') +_followorder = 'follow' # must follow the current order (e.g. '_ & y') + +# transition table for 'x & y', from the current expression 'x' to 'y' +_tofolloworder = { + _anyorder: _anyorder, + _defineorder: _followorder, + _followorder: _followorder, +} + def _matchonly(revs, bases): """ >>> f = lambda *args: _matchonly(*map(parse, args)) @@ -2082,7 +2100,11 @@ def _matchonly(revs, bases): and getstring(bases[1][1], _('not a symbol')) == 'ancestors'): return ('list', revs[2], bases[1][2]) -def _optimize(x, small): +def _optimize(x, small, order): + """ + 'order' specifies how the current expression 'x' is ordered (see the + constants defined above) + """ if x is None: return 0, x @@ -2092,30 +2114,32 @@ def _optimize(x, small): op = x[0] if op == 'minus': - return _optimize(('and', x[1], ('not', x[2])), small) + return _optimize(('and', x[1], ('not', x[2])), small, order) elif op == 'only': t = ('func', ('symbol', 'only'), ('list', x[1], x[2])) - return _optimize(t, small) + return _optimize(t, small, order) elif op == 'onlypost': - return _optimize(('func', ('symbol', 'only'), x[1]), small) + return _optimize(('func', ('symbol', 'only'), x[1]), small, order) elif op == 'dagrangepre': - return _optimize(('func', ('symbol', 'ancestors'), x[1]), small) + return _optimize(('func', ('symbol', 'ancestors'), x[1]), small, order) elif op == 'dagrangepost': - return _optimize(('func', ('symbol', 'descendants'), x[1]), small) + t = ('func', ('symbol', 'descendants'), x[1]) + return _optimize(t, small, order) elif op == 'rangeall': - return _optimize(('range', ('string', '0'), ('string', 'tip')), small) + t = ('range', ('string', '0'), ('string', 'tip')) + return _optimize(t, small, order) elif op == 'rangepre': - return _optimize(('range', ('string', '0'), x[1]), small) + return _optimize(('range', ('string', '0'), x[1]), small, order) elif op == 'rangepost': - return _optimize(('range', x[1], ('string', 'tip')), small) + return _optimize(('range', x[1], ('string', 'tip')), small, order) elif op == 'negate': s = getstring(x[1], _("can't negate that")) - return _optimize(('string', '-' + s), small) + return _optimize(('string', '-' + s), small, order) elif op in 'string symbol negate': return smallbonus, x # single revisions are small elif op == 'and': - wa, ta = _optimize(x[1], True) - wb, tb = _optimize(x[2], True) + wa, ta = _optimize(x[1], True, order) + wb, tb = _optimize(x[2], True, _tofolloworder[order]) w = min(wa, wb) # (::x and not ::y)/(not ::y and ::x) have a fast path @@ -2141,12 +2165,12 @@ def _optimize(x, small): else: s = '\0'.join(t[1] for w, t in ss) y = ('func', ('symbol', '_list'), ('string', s)) - w, t = _optimize(y, False) + w, t = _optimize(y, False, order) ws.append(w) ts.append(t) del ss[:] for y in x[1:]: - w, t = _optimize(y, False) + w, t = _optimize(y, False, order) if t is not None and (t[0] == 'string' or t[0] == 'symbol'): ss.append((w, t)) continue @@ -2164,34 +2188,34 @@ def _optimize(x, small): # Optimize not public() to _notpublic() because we have a fast version if x[1] == ('func', ('symbol', 'public'), None): newsym = ('func', ('symbol', '_notpublic'), None) - o = _optimize(newsym, not small) + o = _optimize(newsym, not small, order) return o[0], o[1] else: - o = _optimize(x[1], not small) + o = _optimize(x[1], not small, _anyorder) return o[0], (op, o[1]) elif op == 'parentpost': - o = _optimize(x[1], small) + o = _optimize(x[1], small, _defineorder) return o[0], (op, o[1]) elif op == 'group': - return _optimize(x[1], small) + return _optimize(x[1], small, order) elif op in 'dagrange range parent ancestorspec': if op == 'parent': # x^:y means (x^) : y, not x ^ (:y) post = ('parentpost', x[1]) if x[2][0] == 'dagrangepre': - return _optimize(('dagrange', post, x[2][1]), small) + return _optimize(('dagrange', post, x[2][1]), small, order) elif x[2][0] == 'rangepre': - return _optimize(('range', post, x[2][1]), small) - - wa, ta = _optimize(x[1], small) - wb, tb = _optimize(x[2], small) + return _optimize(('range', post, x[2][1]), small, order) + + wa, ta = _optimize(x[1], small, _defineorder) + wb, tb = _optimize(x[2], small, _defineorder) return wa + wb, (op, ta, tb) elif op == 'list': - ws, ts = zip(*(_optimize(y, small) for y in x[1:])) + ws, ts = zip(*(_optimize(y, small, order) for y in x[1:])) return sum(ws), (op,) + ts elif op == 'func': f = getstring(x[1], _("not a symbol")) - wa, ta = _optimize(x[2], small) + wa, ta = _optimize(x[2], small, _defineorder) if f in ("author branch closed date desc file grep keyword " "outgoing user"): w = 10 # slow @@ -2211,7 +2235,7 @@ def _optimize(x, small): return 1, x def optimize(tree): - _weight, newtree = _optimize(tree, small=True) + _weight, newtree = _optimize(tree, small=True, order=_defineorder) return newtree # the set of valid characters for the initial letter of symbols in