Submitter | Boris Feld |
---|---|
Date | Jan. 11, 2019, 11:29 a.m. |
Message ID | <73926c4ab24d6c01723e.1547206150@Laptop-Boris.lan> |
Download | mbox | patch |
Permalink | /patch/37665/ |
State | Accepted |
Headers | show |
Comments
On Fri, 11 Jan 2019 12:29:10 +0100, Boris Feld wrote: > # HG changeset patch > # User Boris Feld <boris.feld@octobus.net> > # Date 1546605681 -3600 > # Fri Jan 04 13:41:21 2019 +0100 > # Node ID 73926c4ab24d6c01723ed050601b134bdc89562f > # Parent 4a56fbdacff33c3985bbb84f2e19ddfbd48ed4fa > # EXP-Topic revs-efficiency > # Available At https://bitbucket.org/octobus/mercurial-devel/ > # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 73926c4ab24d > revset: introduce an API that avoids `formatspec` input serialization The idea looks good. > +class _inputrules(parser.basealiasrules): > + """replace internal input reference by their actual value""" > + > + @classmethod > + def _getalias(cls, inputs, tree): > + if not isinstance(tree, tuple): > + return None > + if tree[0] != 'func': > + return None > + if getsymbol(tree[1]) != _internal_input: > + return None > + idx = int(getsymbol(tree[2])) > + newtree = ('func', > + ('symbol', internal_input_func), > + ('smartset', inputs[idx]) > + ) > + return parser.alias(idx, None, None, newtree), None Perhaps, this can be just ('smartset', ...) node like 'symbol'/'string'. There's not point to convert it to a 'func' unless it can be expressed as a revset string. And if we can probably ditch the specialized _inputrules. See below. > +def formatspecargs(expr, *args): > + """same as formatspec, but preserve some expensive arguments""" > + parsed = _parseargs(expr, args) > + ret = [] > + inputs = [] > + for t, arg in parsed: > + if t is None: > + ret.append(arg) > + if t == 'baseset': > + key = '%s(%d)' % (_internal_input, len(inputs)) > + inputs.append(smartset.baseset(arg)) > + ret.append(key) Maybe we can assign integer (e.g. '$0') for each parameter here, and we can just use the _aliasrules.expand() to replace it. '$x' is invalid in user expression, so it can be used as a reserved symbol. raise ProgrammingError if t is not None nor 'baseset'. > + return (b''.join(ret), inputs) If this function returned a parsed tree, we can hide all the implementation details in it, and we can probably remove the inputs argument from revset.matchany().
On 12/01/2019 06:00, Yuya Nishihara wrote: > On Fri, 11 Jan 2019 12:29:10 +0100, Boris Feld wrote: >> # HG changeset patch >> # User Boris Feld <boris.feld@octobus.net> >> # Date 1546605681 -3600 >> # Fri Jan 04 13:41:21 2019 +0100 >> # Node ID 73926c4ab24d6c01723ed050601b134bdc89562f >> # Parent 4a56fbdacff33c3985bbb84f2e19ddfbd48ed4fa >> # EXP-Topic revs-efficiency >> # Available At https://bitbucket.org/octobus/mercurial-devel/ >> # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 73926c4ab24d >> revset: introduce an API that avoids `formatspec` input serialization > The idea looks good. > >> +class _inputrules(parser.basealiasrules): >> + """replace internal input reference by their actual value""" >> + >> + @classmethod >> + def _getalias(cls, inputs, tree): >> + if not isinstance(tree, tuple): >> + return None >> + if tree[0] != 'func': >> + return None >> + if getsymbol(tree[1]) != _internal_input: >> + return None >> + idx = int(getsymbol(tree[2])) >> + newtree = ('func', >> + ('symbol', internal_input_func), >> + ('smartset', inputs[idx]) >> + ) >> + return parser.alias(idx, None, None, newtree), None > Perhaps, this can be just ('smartset', ...) node like 'symbol'/'string'. > There's not point to convert it to a 'func' unless it can be expressed > as a revset string. > > And if we can probably ditch the specialized _inputrules. See below. We need to combine the smartset with the existing subset. Doing it within a function seems simpler and less impactful. > >> +def formatspecargs(expr, *args): >> + """same as formatspec, but preserve some expensive arguments""" >> + parsed = _parseargs(expr, args) >> + ret = [] >> + inputs = [] >> + for t, arg in parsed: >> + if t is None: >> + ret.append(arg) >> + if t == 'baseset': >> + key = '%s(%d)' % (_internal_input, len(inputs)) >> + inputs.append(smartset.baseset(arg)) >> + ret.append(key) > Maybe we can assign integer (e.g. '$0') for each parameter here, and we can > just use the _aliasrules.expand() to replace it. '$x' is invalid in user > expression, so it can be used as a reserved symbol. Don't we use '$0' for user alias? Also, I'm not sure what's the value of using alias replacement here. Can you detail your reasoning? The alias logic seems pretty tied to having string (that it parses) as replacement, that is why I used a dedicated object. > > raise ProgrammingError if t is not None nor 'baseset'. > >> + return (b''.join(ret), inputs) > If this function returned a parsed tree, we can hide all the implementation > details in it, and we can probably remove the inputs argument from > revset.matchany(). But revset/matchany does not take a parsed tree, does it? Overall. I feel like you have a design adjustment in mind, but I can't figure out the big picture. > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
On Sun, 13 Jan 2019 08:40:41 +0100, Boris FELD wrote: > On 12/01/2019 06:00, Yuya Nishihara wrote: > > On Fri, 11 Jan 2019 12:29:10 +0100, Boris Feld wrote: > >> # HG changeset patch > >> # User Boris Feld <boris.feld@octobus.net> > >> # Date 1546605681 -3600 > >> # Fri Jan 04 13:41:21 2019 +0100 > >> # Node ID 73926c4ab24d6c01723ed050601b134bdc89562f > >> # Parent 4a56fbdacff33c3985bbb84f2e19ddfbd48ed4fa > >> # EXP-Topic revs-efficiency > >> # Available At https://bitbucket.org/octobus/mercurial-devel/ > >> # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 73926c4ab24d > >> revset: introduce an API that avoids `formatspec` input serialization > > The idea looks good. > > > >> +class _inputrules(parser.basealiasrules): > >> + """replace internal input reference by their actual value""" > >> + > >> + @classmethod > >> + def _getalias(cls, inputs, tree): > >> + if not isinstance(tree, tuple): > >> + return None > >> + if tree[0] != 'func': > >> + return None > >> + if getsymbol(tree[1]) != _internal_input: > >> + return None > >> + idx = int(getsymbol(tree[2])) > >> + newtree = ('func', > >> + ('symbol', internal_input_func), > >> + ('smartset', inputs[idx]) > >> + ) > >> + return parser.alias(idx, None, None, newtree), None > > Perhaps, this can be just ('smartset', ...) node like 'symbol'/'string'. > > There's not point to convert it to a 'func' unless it can be expressed > > as a revset string. > > > > And if we can probably ditch the specialized _inputrules. See below. > > We need to combine the smartset with the existing subset. Doing it > within a function seems simpler and less impactful. Well, there's no practical difference between ('func', ..., ('smartset',)) and ('smartset',). For ('func', ..., ('smartset',)), we'll need two functions: symbols[internal_input_func] and methods['smartset']. symbols[internal_input_func] needs to check the argument type, and methods['smartset'] would raise ParseError. For ('smartset',), methods['smartset'] knows 'x' is a smartset, so we can just return 'x & subset' / 'subset & x'. See stringset() for example. > >> +def formatspecargs(expr, *args): > >> + """same as formatspec, but preserve some expensive arguments""" > >> + parsed = _parseargs(expr, args) > >> + ret = [] > >> + inputs = [] > >> + for t, arg in parsed: > >> + if t is None: > >> + ret.append(arg) > >> + if t == 'baseset': > >> + key = '%s(%d)' % (_internal_input, len(inputs)) > >> + inputs.append(smartset.baseset(arg)) > >> + ret.append(key) > > Maybe we can assign integer (e.g. '$0') for each parameter here, and we can > > just use the _aliasrules.expand() to replace it. '$x' is invalid in user > > expression, so it can be used as a reserved symbol. > > Don't we use '$0' for user alias? Yes, but that doesn't matter since 'expr' here isn't an alias expression. > Also, I'm not sure what's the value of using alias replacement here. Can > you detail your reasoning? To get rid of the custom _inputrule class. > The alias logic seems pretty tied to having string (that it parses) as > replacement, that is why I used a dedicated object. So here we have string to parse, b''.join(ret), and replacements, inputs. If b''.join(ret) contained '$0' instead of 'internal(0)', and if inputs were dict of {'$0': alias('$0', ..., ('smartset', inputs[0]))}, we can just feed them to _aliasrules. aliases = build_dict_of_inputs(inputs) tree = _parsewith(b''.join(ret), syminitletters=_aliassyminitletters) tree = _aliasrules.expand(aliases, tree) That's the idea. > > If this function returned a parsed tree, we can hide all the implementation > > details in it, and we can probably remove the inputs argument from > > revset.matchany(). > > But revset/matchany does not take a parsed tree, does it? A parsed tree can be directly passed in to revset.makematcher(). Since repo.revs() is an internal API, there aren't much we have to copy from matchany(): tree = foldconcat(tree) tree = analyze(tree) tree = optimize(tree) return tree
On Sun, 13 Jan 2019 18:53:13 +0900, Yuya Nishihara wrote: > So here we have string to parse, b''.join(ret), and replacements, inputs. > If b''.join(ret) contained '$0' instead of 'internal(0)', and if inputs > were dict of {'$0': alias('$0', ..., ('smartset', inputs[0]))}, we can just > feed them to _aliasrules. > > aliases = build_dict_of_inputs(inputs) > tree = _parsewith(b''.join(ret), syminitletters=_aliassyminitletters) > tree = _aliasrules.expand(aliases, tree) Actually there's a simpler tool, parser.buildtree(). We still need an invalid placeholder symbol that can be parsed, e.g. '$', but we don't have to care about the internals of the alias expansion. tree = _parsewith(b''.join(ret), syminitletters=_aliassyminitletters) tree = parser.buildtree(tree, ('symbol', '$'), ('smartset', inputs[0]), ...)
On 13/01/2019 10:53, Yuya Nishihara wrote: > On Sun, 13 Jan 2019 08:40:41 +0100, Boris FELD wrote: >> On 12/01/2019 06:00, Yuya Nishihara wrote: >>> On Fri, 11 Jan 2019 12:29:10 +0100, Boris Feld wrote: >>>> # HG changeset patch >>>> # User Boris Feld <boris.feld@octobus.net> >>>> # Date 1546605681 -3600 >>>> # Fri Jan 04 13:41:21 2019 +0100 >>>> # Node ID 73926c4ab24d6c01723ed050601b134bdc89562f >>>> # Parent 4a56fbdacff33c3985bbb84f2e19ddfbd48ed4fa >>>> # EXP-Topic revs-efficiency >>>> # Available At https://bitbucket.org/octobus/mercurial-devel/ >>>> # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 73926c4ab24d >>>> revset: introduce an API that avoids `formatspec` input serialization >>> The idea looks good. >>> >>>> +class _inputrules(parser.basealiasrules): >>>> + """replace internal input reference by their actual value""" >>>> + >>>> + @classmethod >>>> + def _getalias(cls, inputs, tree): >>>> + if not isinstance(tree, tuple): >>>> + return None >>>> + if tree[0] != 'func': >>>> + return None >>>> + if getsymbol(tree[1]) != _internal_input: >>>> + return None >>>> + idx = int(getsymbol(tree[2])) >>>> + newtree = ('func', >>>> + ('symbol', internal_input_func), >>>> + ('smartset', inputs[idx]) >>>> + ) >>>> + return parser.alias(idx, None, None, newtree), None >>> Perhaps, this can be just ('smartset', ...) node like 'symbol'/'string'. >>> There's not point to convert it to a 'func' unless it can be expressed >>> as a revset string. >>> >>> And if we can probably ditch the specialized _inputrules. See below. >> We need to combine the smartset with the existing subset. Doing it >> within a function seems simpler and less impactful. > Well, there's no practical difference between ('func', ..., ('smartset',)) > and ('smartset',). > > For ('func', ..., ('smartset',)), we'll need two functions: > symbols[internal_input_func] and methods['smartset']. > symbols[internal_input_func] needs to check the argument type, and > methods['smartset'] would raise ParseError. > > For ('smartset',), methods['smartset'] knows 'x' is a smartset, so we can > just return 'x & subset' / 'subset & x'. See stringset() for example. Just learned about `methods`, nice. This is much simpler. > >>>> +def formatspecargs(expr, *args): >>>> + """same as formatspec, but preserve some expensive arguments""" >>>> + parsed = _parseargs(expr, args) >>>> + ret = [] >>>> + inputs = [] >>>> + for t, arg in parsed: >>>> + if t is None: >>>> + ret.append(arg) >>>> + if t == 'baseset': >>>> + key = '%s(%d)' % (_internal_input, len(inputs)) >>>> + inputs.append(smartset.baseset(arg)) >>>> + ret.append(key) >>> Maybe we can assign integer (e.g. '$0') for each parameter here, and we can >>> just use the _aliasrules.expand() to replace it. '$x' is invalid in user >>> expression, so it can be used as a reserved symbol. >> Don't we use '$0' for user alias? > Yes, but that doesn't matter since 'expr' here isn't an alias expression. > >> Also, I'm not sure what's the value of using alias replacement here. Can >> you detail your reasoning? > To get rid of the custom _inputrule class. > >> The alias logic seems pretty tied to having string (that it parses) as >> replacement, that is why I used a dedicated object. > So here we have string to parse, b''.join(ret), and replacements, inputs. > If b''.join(ret) contained '$0' instead of 'internal(0)', and if inputs > were dict of {'$0': alias('$0', ..., ('smartset', inputs[0]))}, we can just > feed them to _aliasrules. > > aliases = build_dict_of_inputs(inputs) > tree = _parsewith(b''.join(ret), syminitletters=_aliassyminitletters) > tree = _aliasrules.expand(aliases, tree) > > That's the idea. Okay, going that route. > >>> If this function returned a parsed tree, we can hide all the implementation >>> details in it, and we can probably remove the inputs argument from >>> revset.matchany(). >> But revset/matchany does not take a parsed tree, does it? > A parsed tree can be directly passed in to revset.makematcher(). Since > repo.revs() is an internal API, there aren't much we have to copy from > matchany(): > > tree = foldconcat(tree) > tree = analyze(tree) > tree = optimize(tree) > return tree Great, I initially pursued a tree-based approached, but could not figure out a good route. Thanks for the showing us the way. The result is both cleaner and simpler \o/ > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Patch
diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -1362,8 +1362,8 @@ class localrepository(object): Returns a revset.abstractsmartset, which is a list-like interface that contains integer revisions. ''' - expr = revsetlang.formatspec(expr, *args) - m = revset.match(None, expr) + expr, inputs = revsetlang.formatspecargs(expr, *args) + m = revset.matchany(None, [expr], inputs=inputs) return m(self) def set(self, expr, *args): diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -2194,6 +2194,14 @@ def _hexlist(repo, subset, x, order): else: return _orderedhexlist(repo, subset, x) +@predicate(revsetlang.internal_input_func, takeorder=True) +def _internal_input(repo, subset, x, order): + # access subtituted value during internal revset runs + if order == followorder: + return subset & x[1] + else: + return x[1] & subset + methods = { "range": rangeset, "rangeall": rangeall, @@ -2230,7 +2238,7 @@ def match(ui, spec, lookup=None): """Create a matcher for a single revision spec""" return matchany(ui, [spec], lookup=lookup) -def matchany(ui, specs, lookup=None, localalias=None): +def matchany(ui, specs, lookup=None, localalias=None, inputs=()): """Create a matcher that will include any revisions matching one of the given specs @@ -2239,6 +2247,9 @@ def matchany(ui, specs, lookup=None, loc If localalias is not None, it is a dict {name: definitionstring}. It takes precedence over [revsetalias] config section. + + inputs containts value for __internal_input__ reference. This is used by + internal revset runs. """ if not specs: def mfunc(repo, subset=None): @@ -2261,6 +2272,8 @@ def matchany(ui, specs, lookup=None, loc aliases.extend(localalias.items()) if aliases: tree = revsetlang.expandaliases(tree, aliases, warn=warn) + if inputs: + tree = revsetlang.expandinputs(inputs, tree) tree = revsetlang.foldconcat(tree) tree = revsetlang.analyze(tree) tree = revsetlang.optimize(tree) diff --git a/mercurial/revsetlang.py b/mercurial/revsetlang.py --- a/mercurial/revsetlang.py +++ b/mercurial/revsetlang.py @@ -69,6 +69,9 @@ symbols = {} # default set of valid characters for non-initial letters of symbols _symletters = _syminitletters | set(pycompat.iterbytestr('-/')) +_internal_input = '__internal_input_placeholder__' +internal_input_func = '__internal_input__' + def tokenize(program, lookup=None, syminitletters=None, symletters=None): ''' Parse a revset statement into a stream of tokens @@ -333,7 +336,7 @@ def _analyze(x): elif op == 'negate': s = getstring(x[1], _("can't negate that")) return _analyze(('string', '-' + s)) - elif op in ('string', 'symbol'): + elif op in ('string', 'symbol', 'smartset'): return x elif op == 'rangeall': return (op, None) @@ -373,7 +376,7 @@ def _optimize(x): return 0, x op = x[0] - if op in ('string', 'symbol'): + if op in ('string', 'symbol', 'smartset'): return 0.5, x # single revisions are small elif op == 'and': wa, ta = _optimize(x[1]) @@ -532,6 +535,26 @@ def expandaliases(tree, aliases, warn=No alias.warned = True return tree +class _inputrules(parser.basealiasrules): + """replace internal input reference by their actual value""" + + @classmethod + def _getalias(cls, inputs, tree): + if not isinstance(tree, tuple): + return None + if tree[0] != 'func': + return None + if getsymbol(tree[1]) != _internal_input: + return None + idx = int(getsymbol(tree[2])) + newtree = ('func', + ('symbol', internal_input_func), + ('smartset', inputs[idx]) + ) + return parser.alias(idx, None, None, newtree), None + +expandinputs = _inputrules.expand + def foldconcat(tree): """Fold elements to be concatenated by `##` """ @@ -686,12 +709,23 @@ def formatspec(expr, *args): if t == 'baseset': if isinstance(arg, set): arg = sorted(arg) - try: - ret.append(_formatintlist(list(arg))) - except (TypeError, ValueError): - raise error.ParseError(_('invalid argument for revspec')) + ret.append(_formatintlist(list(arg))) return b''.join(ret) +def formatspecargs(expr, *args): + """same as formatspec, but preserve some expensive arguments""" + parsed = _parseargs(expr, args) + ret = [] + inputs = [] + for t, arg in parsed: + if t is None: + ret.append(arg) + if t == 'baseset': + key = '%s(%d)' % (_internal_input, len(inputs)) + inputs.append(smartset.baseset(arg)) + ret.append(key) + return (b''.join(ret), inputs) + def _parseargs(expr, args): """parse the expression and replace all inexpensive args