Patchwork [1,of,2] dagop: split module hosting DAG-related algorithms from revset

login
register
mail settings
Submitter Yuya Nishihara
Date June 17, 2017, 3:59 p.m.
Message ID <ea5c0d4b4f3ff4b78218.1497715162@mimosa>
Download mbox | patch
Permalink /patch/21462/
State Accepted
Headers show

Comments

Yuya Nishihara - June 17, 2017, 3:59 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1476608604 -32400
#      Sun Oct 16 18:03:24 2016 +0900
# Node ID ea5c0d4b4f3ff4b78218d21e29b8c2068f78907f
# Parent  9d472b219fb07e011c7a6255c5be47e6fc66229c
dagop: split module hosting DAG-related algorithms from revset

This module hosts the following functions. They are somewhat similar (e.g.
scanning revisions using heap queue or stack) and seem non-trivial in
algorithmic point of view.

 - _revancestors()
 - _revdescendants()
 - reachableroots()
 - _toposort()

I was thinking of adding revset._fileancestors() generator for better follow()
implementation, but it would be called from context.py as well. So I decided
to create new module.

Naming is hard. I couldn't come up with any better module name, so it's called
"dag operation" now. I rejected the following candidates:

 - ancestor.py - existing, revlog-level DAG algorithm
 - ancestorset.py - doesn't always return a set
 - dagalgorithm.py - hard to type
 - dagutil.py - existing
 - revancestor.py - I want to add fileancestors()

  % wc -l mercurial/dagop.py mercurial/revset.py
    339 mercurial/dagop.py
   2020 mercurial/revset.py
   2359 total

Patch

diff --git a/mercurial/revset.py b/mercurial/dagop.py
copy from mercurial/revset.py
copy to mercurial/dagop.py
--- a/mercurial/revset.py
+++ b/mercurial/dagop.py
@@ -1,4 +1,4 @@ 
-# revset.py - revision set queries for mercurial
+# dagop.py - graph ancestry and topology algorithm for revset
 #
 # Copyright 2010 Matt Mackall <mpm@selenic.com>
 #
@@ -8,48 +8,17 @@ 
 from __future__ import absolute_import
 
 import heapq
-import re
 
-from .i18n import _
 from . import (
-    destutil,
-    encoding,
     error,
-    hbisect,
-    match as matchmod,
     node,
-    obsolete as obsmod,
-    pathutil,
-    phases,
-    registrar,
-    repoview,
-    revsetlang,
-    scmutil,
     smartset,
-    util,
 )
 
-# helpers for processing parsed tree
-getsymbol = revsetlang.getsymbol
-getstring = revsetlang.getstring
-getinteger = revsetlang.getinteger
-getboolean = revsetlang.getboolean
-getlist = revsetlang.getlist
-getrange = revsetlang.getrange
-getargs = revsetlang.getargs
-getargsdict = revsetlang.getargsdict
-
-# constants used as an argument of match() and matchany()
-anyorder = revsetlang.anyorder
-defineorder = revsetlang.defineorder
-followorder = revsetlang.followorder
-
 baseset = smartset.baseset
 generatorset = smartset.generatorset
-spanset = smartset.spanset
-fullreposet = smartset.fullreposet
 
-def _revancestors(repo, revs, followfirst):
+def revancestors(repo, revs, followfirst):
     """Like revlog.ancestors(), but supports followfirst."""
     if followfirst:
         cut = 1
@@ -87,7 +56,7 @@  def _revancestors(repo, revs, followfirs
 
     return generatorset(iterate(), iterasc=False)
 
-def _revdescendants(repo, revs, followfirst):
+def revdescendants(repo, revs, followfirst):
     """Like revlog.descendants() but supports followfirst."""
     if followfirst:
         cut = 1
@@ -171,1705 +140,7 @@  def reachableroots(repo, roots, heads, i
     revs.sort()
     return revs
 
-# helpers
-
-def getset(repo, subset, x):
-    if not x:
-        raise error.ParseError(_("missing argument"))
-    return methods[x[0]](repo, subset, *x[1:])
-
-def _getrevsource(repo, r):
-    extra = repo[r].extra()
-    for label in ('source', 'transplant_source', 'rebase_source'):
-        if label in extra:
-            try:
-                return repo[extra[label]].rev()
-            except error.RepoLookupError:
-                pass
-    return None
-
-# operator methods
-
-def stringset(repo, subset, x):
-    x = scmutil.intrev(repo[x])
-    if (x in subset
-        or x == node.nullrev and isinstance(subset, fullreposet)):
-        return baseset([x])
-    return baseset()
-
-def rangeset(repo, subset, x, y, order):
-    m = getset(repo, fullreposet(repo), x)
-    n = getset(repo, fullreposet(repo), y)
-
-    if not m or not n:
-        return baseset()
-    return _makerangeset(repo, subset, m.first(), n.last(), order)
-
-def rangeall(repo, subset, x, order):
-    assert x is None
-    return _makerangeset(repo, subset, 0, len(repo) - 1, order)
-
-def rangepre(repo, subset, y, order):
-    # ':y' can't be rewritten to '0:y' since '0' may be hidden
-    n = getset(repo, fullreposet(repo), y)
-    if not n:
-        return baseset()
-    return _makerangeset(repo, subset, 0, n.last(), order)
-
-def rangepost(repo, subset, x, order):
-    m = getset(repo, fullreposet(repo), x)
-    if not m:
-        return baseset()
-    return _makerangeset(repo, subset, m.first(), len(repo) - 1, order)
-
-def _makerangeset(repo, subset, m, n, order):
-    if m == n:
-        r = baseset([m])
-    elif n == node.wdirrev:
-        r = spanset(repo, m, len(repo)) + baseset([n])
-    elif m == node.wdirrev:
-        r = baseset([m]) + spanset(repo, len(repo) - 1, n - 1)
-    elif m < n:
-        r = spanset(repo, m, n + 1)
-    else:
-        r = spanset(repo, m, n - 1)
-
-    if order == defineorder:
-        return r & subset
-    else:
-        # carrying the sorting over when possible would be more efficient
-        return subset & r
-
-def dagrange(repo, subset, x, y, order):
-    r = fullreposet(repo)
-    xs = reachableroots(repo, getset(repo, r, x), getset(repo, r, y),
-                         includepath=True)
-    return subset & xs
-
-def andset(repo, subset, x, y, order):
-    return getset(repo, getset(repo, subset, x), y)
-
-def differenceset(repo, subset, x, y, order):
-    return getset(repo, subset, x) - getset(repo, subset, y)
-
-def _orsetlist(repo, subset, xs):
-    assert xs
-    if len(xs) == 1:
-        return getset(repo, subset, xs[0])
-    p = len(xs) // 2
-    a = _orsetlist(repo, subset, xs[:p])
-    b = _orsetlist(repo, subset, xs[p:])
-    return a + b
-
-def orset(repo, subset, x, order):
-    xs = getlist(x)
-    if order == followorder:
-        # slow path to take the subset order
-        return subset & _orsetlist(repo, fullreposet(repo), xs)
-    else:
-        return _orsetlist(repo, subset, xs)
-
-def notset(repo, subset, x, order):
-    return subset - getset(repo, subset, x)
-
-def listset(repo, subset, *xs):
-    raise error.ParseError(_("can't use a list in this context"),
-                           hint=_('see hg help "revsets.x or y"'))
-
-def keyvaluepair(repo, subset, k, v):
-    raise error.ParseError(_("can't use a key-value pair in this context"))
-
-def func(repo, subset, a, b, order):
-    f = getsymbol(a)
-    if f in symbols:
-        func = symbols[f]
-        if getattr(func, '_takeorder', False):
-            return func(repo, subset, b, order)
-        return func(repo, subset, b)
-
-    keep = lambda fn: getattr(fn, '__doc__', None) is not None
-
-    syms = [s for (s, fn) in symbols.items() if keep(fn)]
-    raise error.UnknownIdentifier(f, syms)
-
-# functions
-
-# symbols are callables like:
-#   fn(repo, subset, x)
-# with:
-#   repo - current repository instance
-#   subset - of revisions to be examined
-#   x - argument in tree form
-symbols = {}
-
-# symbols which can't be used for a DoS attack for any given input
-# (e.g. those which accept regexes as plain strings shouldn't be included)
-# functions that just return a lot of changesets (like all) don't count here
-safesymbols = set()
-
-predicate = registrar.revsetpredicate()
-
-@predicate('_destupdate')
-def _destupdate(repo, subset, x):
-    # experimental revset for update destination
-    args = getargsdict(x, 'limit', 'clean')
-    return subset & baseset([destutil.destupdate(repo, **args)[0]])
-
-@predicate('_destmerge')
-def _destmerge(repo, subset, x):
-    # experimental revset for merge destination
-    sourceset = None
-    if x is not None:
-        sourceset = getset(repo, fullreposet(repo), x)
-    return subset & baseset([destutil.destmerge(repo, sourceset=sourceset)])
-
-@predicate('adds(pattern)', safe=True)
-def adds(repo, subset, x):
-    """Changesets that add a file matching pattern.
-
-    The pattern without explicit kind like ``glob:`` is expected to be
-    relative to the current directory and match against a file or a
-    directory.
-    """
-    # i18n: "adds" is a keyword
-    pat = getstring(x, _("adds requires a pattern"))
-    return checkstatus(repo, subset, pat, 1)
-
-@predicate('ancestor(*changeset)', safe=True)
-def ancestor(repo, subset, x):
-    """A greatest common ancestor of the changesets.
-
-    Accepts 0 or more changesets.
-    Will return empty list when passed no args.
-    Greatest common ancestor of a single changeset is that changeset.
-    """
-    # i18n: "ancestor" is a keyword
-    l = getlist(x)
-    rl = fullreposet(repo)
-    anc = None
-
-    # (getset(repo, rl, i) for i in l) generates a list of lists
-    for revs in (getset(repo, rl, i) for i in l):
-        for r in revs:
-            if anc is None:
-                anc = repo[r]
-            else:
-                anc = anc.ancestor(repo[r])
-
-    if anc is not None and anc.rev() in subset:
-        return baseset([anc.rev()])
-    return baseset()
-
-def _ancestors(repo, subset, x, followfirst=False):
-    heads = getset(repo, fullreposet(repo), x)
-    if not heads:
-        return baseset()
-    s = _revancestors(repo, heads, followfirst)
-    return subset & s
-
-@predicate('ancestors(set)', safe=True)
-def ancestors(repo, subset, x):
-    """Changesets that are ancestors of a changeset in set.
-    """
-    return _ancestors(repo, subset, x)
-
-@predicate('_firstancestors', safe=True)
-def _firstancestors(repo, subset, x):
-    # ``_firstancestors(set)``
-    # Like ``ancestors(set)`` but follows only the first parents.
-    return _ancestors(repo, subset, x, followfirst=True)
-
-def _childrenspec(repo, subset, x, n, order):
-    """Changesets that are the Nth child of a changeset
-    in set.
-    """
-    cs = set()
-    for r in getset(repo, fullreposet(repo), x):
-        for i in range(n):
-            c = repo[r].children()
-            if len(c) == 0:
-                break
-            if len(c) > 1:
-                raise error.RepoLookupError(
-                    _("revision in set has more than one child"))
-            r = c[0]
-        else:
-            cs.add(r)
-    return subset & cs
-
-def ancestorspec(repo, subset, x, n, order):
-    """``set~n``
-    Changesets that are the Nth ancestor (first parents only) of a changeset
-    in set.
-    """
-    n = getinteger(n, _("~ expects a number"))
-    if n < 0:
-        # children lookup
-        return _childrenspec(repo, subset, x, -n, order)
-    ps = set()
-    cl = repo.changelog
-    for r in getset(repo, fullreposet(repo), x):
-        for i in range(n):
-            try:
-                r = cl.parentrevs(r)[0]
-            except error.WdirUnsupported:
-                r = repo[r].parents()[0].rev()
-        ps.add(r)
-    return subset & ps
-
-@predicate('author(string)', safe=True)
-def author(repo, subset, x):
-    """Alias for ``user(string)``.
-    """
-    # i18n: "author" is a keyword
-    n = getstring(x, _("author requires a string"))
-    kind, pattern, matcher = _substringmatcher(n, casesensitive=False)
-    return subset.filter(lambda x: matcher(repo[x].user()),
-                         condrepr=('<user %r>', n))
-
-@predicate('bisect(string)', safe=True)
-def bisect(repo, subset, x):
-    """Changesets marked in the specified bisect status:
-
-    - ``good``, ``bad``, ``skip``: csets explicitly marked as good/bad/skip
-    - ``goods``, ``bads``      : csets topologically good/bad
-    - ``range``              : csets taking part in the bisection
-    - ``pruned``             : csets that are goods, bads or skipped
-    - ``untested``           : csets whose fate is yet unknown
-    - ``ignored``            : csets ignored due to DAG topology
-    - ``current``            : the cset currently being bisected
-    """
-    # i18n: "bisect" is a keyword
-    status = getstring(x, _("bisect requires a string")).lower()
-    state = set(hbisect.get(repo, status))
-    return subset & state
-
-# Backward-compatibility
-# - no help entry so that we do not advertise it any more
-@predicate('bisected', safe=True)
-def bisected(repo, subset, x):
-    return bisect(repo, subset, x)
-
-@predicate('bookmark([name])', safe=True)
-def bookmark(repo, subset, x):
-    """The named bookmark or all bookmarks.
-
-    Pattern matching is supported for `name`. See :hg:`help revisions.patterns`.
-    """
-    # i18n: "bookmark" is a keyword
-    args = getargs(x, 0, 1, _('bookmark takes one or no arguments'))
-    if args:
-        bm = getstring(args[0],
-                       # i18n: "bookmark" is a keyword
-                       _('the argument to bookmark must be a string'))
-        kind, pattern, matcher = util.stringmatcher(bm)
-        bms = set()
-        if kind == 'literal':
-            bmrev = repo._bookmarks.get(pattern, None)
-            if not bmrev:
-                raise error.RepoLookupError(_("bookmark '%s' does not exist")
-                                            % pattern)
-            bms.add(repo[bmrev].rev())
-        else:
-            matchrevs = set()
-            for name, bmrev in repo._bookmarks.iteritems():
-                if matcher(name):
-                    matchrevs.add(bmrev)
-            if not matchrevs:
-                raise error.RepoLookupError(_("no bookmarks exist"
-                                              " that match '%s'") % pattern)
-            for bmrev in matchrevs:
-                bms.add(repo[bmrev].rev())
-    else:
-        bms = {repo[r].rev() for r in repo._bookmarks.values()}
-    bms -= {node.nullrev}
-    return subset & bms
-
-@predicate('branch(string or set)', safe=True)
-def branch(repo, subset, x):
-    """
-    All changesets belonging to the given branch or the branches of the given
-    changesets.
-
-    Pattern matching is supported for `string`. See
-    :hg:`help revisions.patterns`.
-    """
-    getbi = repo.revbranchcache().branchinfo
-    def getbranch(r):
-        try:
-            return getbi(r)[0]
-        except error.WdirUnsupported:
-            return repo[r].branch()
-
-    try:
-        b = getstring(x, '')
-    except error.ParseError:
-        # not a string, but another revspec, e.g. tip()
-        pass
-    else:
-        kind, pattern, matcher = util.stringmatcher(b)
-        if kind == 'literal':
-            # note: falls through to the revspec case if no branch with
-            # this name exists and pattern kind is not specified explicitly
-            if pattern in repo.branchmap():
-                return subset.filter(lambda r: matcher(getbranch(r)),
-                                     condrepr=('<branch %r>', b))
-            if b.startswith('literal:'):
-                raise error.RepoLookupError(_("branch '%s' does not exist")
-                                            % pattern)
-        else:
-            return subset.filter(lambda r: matcher(getbranch(r)),
-                                 condrepr=('<branch %r>', b))
-
-    s = getset(repo, fullreposet(repo), x)
-    b = set()
-    for r in s:
-        b.add(getbranch(r))
-    c = s.__contains__
-    return subset.filter(lambda r: c(r) or getbranch(r) in b,
-                         condrepr=lambda: '<branch %r>' % sorted(b))
-
-@predicate('bumped()', safe=True)
-def bumped(repo, subset, x):
-    """Mutable changesets marked as successors of public changesets.
-
-    Only non-public and non-obsolete changesets can be `bumped`.
-    """
-    # i18n: "bumped" is a keyword
-    getargs(x, 0, 0, _("bumped takes no arguments"))
-    bumped = obsmod.getrevs(repo, 'bumped')
-    return subset & bumped
-
-@predicate('bundle()', safe=True)
-def bundle(repo, subset, x):
-    """Changesets in the bundle.
-
-    Bundle must be specified by the -R option."""
-
-    try:
-        bundlerevs = repo.changelog.bundlerevs
-    except AttributeError:
-        raise error.Abort(_("no bundle provided - specify with -R"))
-    return subset & bundlerevs
-
-def checkstatus(repo, subset, pat, field):
-    hasset = matchmod.patkind(pat) == 'set'
-
-    mcache = [None]
-    def matches(x):
-        c = repo[x]
-        if not mcache[0] or hasset:
-            mcache[0] = matchmod.match(repo.root, repo.getcwd(), [pat], ctx=c)
-        m = mcache[0]
-        fname = None
-        if not m.anypats() and len(m.files()) == 1:
-            fname = m.files()[0]
-        if fname is not None:
-            if fname not in c.files():
-                return False
-        else:
-            for f in c.files():
-                if m(f):
-                    break
-            else:
-                return False
-        files = repo.status(c.p1().node(), c.node())[field]
-        if fname is not None:
-            if fname in files:
-                return True
-        else:
-            for f in files:
-                if m(f):
-                    return True
-
-    return subset.filter(matches, condrepr=('<status[%r] %r>', field, pat))
-
-def _children(repo, subset, parentset):
-    if not parentset:
-        return baseset()
-    cs = set()
-    pr = repo.changelog.parentrevs
-    minrev = parentset.min()
-    nullrev = node.nullrev
-    for r in subset:
-        if r <= minrev:
-            continue
-        p1, p2 = pr(r)
-        if p1 in parentset:
-            cs.add(r)
-        if p2 != nullrev and p2 in parentset:
-            cs.add(r)
-    return baseset(cs)
-
-@predicate('children(set)', safe=True)
-def children(repo, subset, x):
-    """Child changesets of changesets in set.
-    """
-    s = getset(repo, fullreposet(repo), x)
-    cs = _children(repo, subset, s)
-    return subset & cs
-
-@predicate('closed()', safe=True)
-def closed(repo, subset, x):
-    """Changeset is closed.
-    """
-    # i18n: "closed" is a keyword
-    getargs(x, 0, 0, _("closed takes no arguments"))
-    return subset.filter(lambda r: repo[r].closesbranch(),
-                         condrepr='<branch closed>')
-
-@predicate('contains(pattern)')
-def contains(repo, subset, x):
-    """The revision's manifest contains a file matching pattern (but might not
-    modify it). See :hg:`help patterns` for information about file patterns.
-
-    The pattern without explicit kind like ``glob:`` is expected to be
-    relative to the current directory and match against a file exactly
-    for efficiency.
-    """
-    # i18n: "contains" is a keyword
-    pat = getstring(x, _("contains requires a pattern"))
-
-    def matches(x):
-        if not matchmod.patkind(pat):
-            pats = pathutil.canonpath(repo.root, repo.getcwd(), pat)
-            if pats in repo[x]:
-                return True
-        else:
-            c = repo[x]
-            m = matchmod.match(repo.root, repo.getcwd(), [pat], ctx=c)
-            for f in c.manifest():
-                if m(f):
-                    return True
-        return False
-
-    return subset.filter(matches, condrepr=('<contains %r>', pat))
-
-@predicate('converted([id])', safe=True)
-def converted(repo, subset, x):
-    """Changesets converted from the given identifier in the old repository if
-    present, or all converted changesets if no identifier is specified.
-    """
-
-    # There is exactly no chance of resolving the revision, so do a simple
-    # string compare and hope for the best
-
-    rev = None
-    # i18n: "converted" is a keyword
-    l = getargs(x, 0, 1, _('converted takes one or no arguments'))
-    if l:
-        # i18n: "converted" is a keyword
-        rev = getstring(l[0], _('converted requires a revision'))
-
-    def _matchvalue(r):
-        source = repo[r].extra().get('convert_revision', None)
-        return source is not None and (rev is None or source.startswith(rev))
-
-    return subset.filter(lambda r: _matchvalue(r),
-                         condrepr=('<converted %r>', rev))
-
-@predicate('date(interval)', safe=True)
-def date(repo, subset, x):
-    """Changesets within the interval, see :hg:`help dates`.
-    """
-    # i18n: "date" is a keyword
-    ds = getstring(x, _("date requires a string"))
-    dm = util.matchdate(ds)
-    return subset.filter(lambda x: dm(repo[x].date()[0]),
-                         condrepr=('<date %r>', ds))
-
-@predicate('desc(string)', safe=True)
-def desc(repo, subset, x):
-    """Search commit message for string. The match is case-insensitive.
-
-    Pattern matching is supported for `string`. See
-    :hg:`help revisions.patterns`.
-    """
-    # i18n: "desc" is a keyword
-    ds = getstring(x, _("desc requires a string"))
-
-    kind, pattern, matcher = _substringmatcher(ds, casesensitive=False)
-
-    return subset.filter(lambda r: matcher(repo[r].description()),
-                         condrepr=('<desc %r>', ds))
-
-def _descendants(repo, subset, x, followfirst=False):
-    roots = getset(repo, fullreposet(repo), x)
-    if not roots:
-        return baseset()
-    s = _revdescendants(repo, roots, followfirst)
-
-    # Both sets need to be ascending in order to lazily return the union
-    # in the correct order.
-    base = subset & roots
-    desc = subset & s
-    result = base + desc
-    if subset.isascending():
-        result.sort()
-    elif subset.isdescending():
-        result.sort(reverse=True)
-    else:
-        result = subset & result
-    return result
-
-@predicate('descendants(set)', safe=True)
-def descendants(repo, subset, x):
-    """Changesets which are descendants of changesets in set.
-    """
-    return _descendants(repo, subset, x)
-
-@predicate('_firstdescendants', safe=True)
-def _firstdescendants(repo, subset, x):
-    # ``_firstdescendants(set)``
-    # Like ``descendants(set)`` but follows only the first parents.
-    return _descendants(repo, subset, x, followfirst=True)
-
-@predicate('destination([set])', safe=True)
-def destination(repo, subset, x):
-    """Changesets that were created by a graft, transplant or rebase operation,
-    with the given revisions specified as the source.  Omitting the optional set
-    is the same as passing all().
-    """
-    if x is not None:
-        sources = getset(repo, fullreposet(repo), x)
-    else:
-        sources = fullreposet(repo)
-
-    dests = set()
-
-    # subset contains all of the possible destinations that can be returned, so
-    # iterate over them and see if their source(s) were provided in the arg set.
-    # Even if the immediate src of r is not in the arg set, src's source (or
-    # further back) may be.  Scanning back further than the immediate src allows
-    # transitive transplants and rebases to yield the same results as transitive
-    # grafts.
-    for r in subset:
-        src = _getrevsource(repo, r)
-        lineage = None
-
-        while src is not None:
-            if lineage is None:
-                lineage = list()
-
-            lineage.append(r)
-
-            # The visited lineage is a match if the current source is in the arg
-            # set.  Since every candidate dest is visited by way of iterating
-            # subset, any dests further back in the lineage will be tested by a
-            # different iteration over subset.  Likewise, if the src was already
-            # selected, the current lineage can be selected without going back
-            # further.
-            if src in sources or src in dests:
-                dests.update(lineage)
-                break
-
-            r = src
-            src = _getrevsource(repo, r)
-
-    return subset.filter(dests.__contains__,
-                         condrepr=lambda: '<destination %r>' % sorted(dests))
-
-@predicate('divergent()', safe=True)
-def divergent(repo, subset, x):
-    """
-    Final successors of changesets with an alternative set of final successors.
-    """
-    # i18n: "divergent" is a keyword
-    getargs(x, 0, 0, _("divergent takes no arguments"))
-    divergent = obsmod.getrevs(repo, 'divergent')
-    return subset & divergent
-
-@predicate('extinct()', safe=True)
-def extinct(repo, subset, x):
-    """Obsolete changesets with obsolete descendants only.
-    """
-    # i18n: "extinct" is a keyword
-    getargs(x, 0, 0, _("extinct takes no arguments"))
-    extincts = obsmod.getrevs(repo, 'extinct')
-    return subset & extincts
-
-@predicate('extra(label, [value])', safe=True)
-def extra(repo, subset, x):
-    """Changesets with the given label in the extra metadata, with the given
-    optional value.
-
-    Pattern matching is supported for `value`. See
-    :hg:`help revisions.patterns`.
-    """
-    args = getargsdict(x, 'extra', 'label value')
-    if 'label' not in args:
-        # i18n: "extra" is a keyword
-        raise error.ParseError(_('extra takes at least 1 argument'))
-    # i18n: "extra" is a keyword
-    label = getstring(args['label'], _('first argument to extra must be '
-                                       'a string'))
-    value = None
-
-    if 'value' in args:
-        # i18n: "extra" is a keyword
-        value = getstring(args['value'], _('second argument to extra must be '
-                                           'a string'))
-        kind, value, matcher = util.stringmatcher(value)
-
-    def _matchvalue(r):
-        extra = repo[r].extra()
-        return label in extra and (value is None or matcher(extra[label]))
-
-    return subset.filter(lambda r: _matchvalue(r),
-                         condrepr=('<extra[%r] %r>', label, value))
-
-@predicate('filelog(pattern)', safe=True)
-def filelog(repo, subset, x):
-    """Changesets connected to the specified filelog.
-
-    For performance reasons, visits only revisions mentioned in the file-level
-    filelog, rather than filtering through all changesets (much faster, but
-    doesn't include deletes or duplicate changes). For a slower, more accurate
-    result, use ``file()``.
-
-    The pattern without explicit kind like ``glob:`` is expected to be
-    relative to the current directory and match against a file exactly
-    for efficiency.
-
-    If some linkrev points to revisions filtered by the current repoview, we'll
-    work around it to return a non-filtered value.
-    """
-
-    # i18n: "filelog" is a keyword
-    pat = getstring(x, _("filelog requires a pattern"))
-    s = set()
-    cl = repo.changelog
-
-    if not matchmod.patkind(pat):
-        f = pathutil.canonpath(repo.root, repo.getcwd(), pat)
-        files = [f]
-    else:
-        m = matchmod.match(repo.root, repo.getcwd(), [pat], ctx=repo[None])
-        files = (f for f in repo[None] if m(f))
-
-    for f in files:
-        fl = repo.file(f)
-        known = {}
-        scanpos = 0
-        for fr in list(fl):
-            fn = fl.node(fr)
-            if fn in known:
-                s.add(known[fn])
-                continue
-
-            lr = fl.linkrev(fr)
-            if lr in cl:
-                s.add(lr)
-            elif scanpos is not None:
-                # lowest matching changeset is filtered, scan further
-                # ahead in changelog
-                start = max(lr, scanpos) + 1
-                scanpos = None
-                for r in cl.revs(start):
-                    # minimize parsing of non-matching entries
-                    if f in cl.revision(r) and f in cl.readfiles(r):
-                        try:
-                            # try to use manifest delta fastpath
-                            n = repo[r].filenode(f)
-                            if n not in known:
-                                if n == fn:
-                                    s.add(r)
-                                    scanpos = r
-                                    break
-                                else:
-                                    known[n] = r
-                        except error.ManifestLookupError:
-                            # deletion in changelog
-                            continue
-
-    return subset & s
-
-@predicate('first(set, [n])', safe=True, takeorder=True)
-def first(repo, subset, x, order):
-    """An alias for limit().
-    """
-    return limit(repo, subset, x, order)
-
-def _follow(repo, subset, x, name, followfirst=False):
-    l = getargs(x, 0, 2, _("%s takes no arguments or a pattern "
-                           "and an optional revset") % name)
-    c = repo['.']
-    if l:
-        x = getstring(l[0], _("%s expected a pattern") % name)
-        rev = None
-        if len(l) >= 2:
-            revs = getset(repo, fullreposet(repo), l[1])
-            if len(revs) != 1:
-                raise error.RepoLookupError(
-                        _("%s expected one starting revision") % name)
-            rev = revs.last()
-            c = repo[rev]
-        matcher = matchmod.match(repo.root, repo.getcwd(), [x],
-                                 ctx=repo[rev], default='path')
-
-        files = c.manifest().walk(matcher)
-
-        s = set()
-        for fname in files:
-            fctx = c[fname]
-            s = s.union(set(c.rev() for c in fctx.ancestors(followfirst)))
-            # include the revision responsible for the most recent version
-            s.add(fctx.introrev())
-    else:
-        s = _revancestors(repo, baseset([c.rev()]), followfirst)
-
-    return subset & s
-
-@predicate('follow([pattern[, startrev]])', safe=True)
-def follow(repo, subset, x):
-    """
-    An alias for ``::.`` (ancestors of the working directory's first parent).
-    If pattern is specified, the histories of files matching given
-    pattern in the revision given by startrev are followed, including copies.
-    """
-    return _follow(repo, subset, x, 'follow')
-
-@predicate('_followfirst', safe=True)
-def _followfirst(repo, subset, x):
-    # ``followfirst([pattern[, startrev]])``
-    # Like ``follow([pattern[, startrev]])`` but follows only the first parent
-    # of every revisions or files revisions.
-    return _follow(repo, subset, x, '_followfirst', followfirst=True)
-
-@predicate('followlines(file, fromline:toline[, startrev=., descend=False])',
-           safe=True)
-def followlines(repo, subset, x):
-    """Changesets modifying `file` in line range ('fromline', 'toline').
-
-    Line range corresponds to 'file' content at 'startrev' and should hence be
-    consistent with file size. If startrev is not specified, working directory's
-    parent is used.
-
-    By default, ancestors of 'startrev' are returned. If 'descend' is True,
-    descendants of 'startrev' are returned though renames are (currently) not
-    followed in this direction.
-    """
-    from . import context  # avoid circular import issues
-
-    args = getargsdict(x, 'followlines', 'file *lines startrev descend')
-    if len(args['lines']) != 1:
-        raise error.ParseError(_("followlines requires a line range"))
-
-    rev = '.'
-    if 'startrev' in args:
-        revs = getset(repo, fullreposet(repo), args['startrev'])
-        if len(revs) != 1:
-            raise error.ParseError(
-                # i18n: "followlines" is a keyword
-                _("followlines expects exactly one revision"))
-        rev = revs.last()
-
-    pat = getstring(args['file'], _("followlines requires a pattern"))
-    if not matchmod.patkind(pat):
-        fname = pathutil.canonpath(repo.root, repo.getcwd(), pat)
-    else:
-        m = matchmod.match(repo.root, repo.getcwd(), [pat], ctx=repo[rev])
-        files = [f for f in repo[rev] if m(f)]
-        if len(files) != 1:
-            # i18n: "followlines" is a keyword
-            raise error.ParseError(_("followlines expects exactly one file"))
-        fname = files[0]
-
-    # i18n: "followlines" is a keyword
-    lr = getrange(args['lines'][0], _("followlines expects a line range"))
-    fromline, toline = [getinteger(a, _("line range bounds must be integers"))
-                        for a in lr]
-    fromline, toline = util.processlinerange(fromline, toline)
-
-    fctx = repo[rev].filectx(fname)
-    descend = False
-    if 'descend' in args:
-        descend = getboolean(args['descend'],
-                             # i18n: "descend" is a keyword
-                             _("descend argument must be a boolean"))
-    if descend:
-        rs = generatorset(
-            (c.rev() for c, _linerange
-             in context.blockdescendants(fctx, fromline, toline)),
-            iterasc=True)
-    else:
-        rs = generatorset(
-            (c.rev() for c, _linerange
-             in context.blockancestors(fctx, fromline, toline)),
-            iterasc=False)
-    return subset & rs
-
-@predicate('all()', safe=True)
-def getall(repo, subset, x):
-    """All changesets, the same as ``0:tip``.
-    """
-    # i18n: "all" is a keyword
-    getargs(x, 0, 0, _("all takes no arguments"))
-    return subset & spanset(repo)  # drop "null" if any
-
-@predicate('grep(regex)')
-def grep(repo, subset, x):
-    """Like ``keyword(string)`` but accepts a regex. Use ``grep(r'...')``
-    to ensure special escape characters are handled correctly. Unlike
-    ``keyword(string)``, the match is case-sensitive.
-    """
-    try:
-        # i18n: "grep" is a keyword
-        gr = re.compile(getstring(x, _("grep requires a string")))
-    except re.error as e:
-        raise error.ParseError(_('invalid match pattern: %s') % e)
-
-    def matches(x):
-        c = repo[x]
-        for e in c.files() + [c.user(), c.description()]:
-            if gr.search(e):
-                return True
-        return False
-
-    return subset.filter(matches, condrepr=('<grep %r>', gr.pattern))
-
-@predicate('_matchfiles', safe=True)
-def _matchfiles(repo, subset, x):
-    # _matchfiles takes a revset list of prefixed arguments:
-    #
-    #   [p:foo, i:bar, x:baz]
-    #
-    # builds a match object from them and filters subset. Allowed
-    # prefixes are 'p:' for regular patterns, 'i:' for include
-    # patterns and 'x:' for exclude patterns. Use 'r:' prefix to pass
-    # a revision identifier, or the empty string to reference the
-    # working directory, from which the match object is
-    # initialized. Use 'd:' to set the default matching mode, default
-    # to 'glob'. At most one 'r:' and 'd:' argument can be passed.
-
-    l = getargs(x, 1, -1, "_matchfiles requires at least one argument")
-    pats, inc, exc = [], [], []
-    rev, default = None, None
-    for arg in l:
-        s = getstring(arg, "_matchfiles requires string arguments")
-        prefix, value = s[:2], s[2:]
-        if prefix == 'p:':
-            pats.append(value)
-        elif prefix == 'i:':
-            inc.append(value)
-        elif prefix == 'x:':
-            exc.append(value)
-        elif prefix == 'r:':
-            if rev is not None:
-                raise error.ParseError('_matchfiles expected at most one '
-                                       'revision')
-            if value != '': # empty means working directory; leave rev as None
-                rev = value
-        elif prefix == 'd:':
-            if default is not None:
-                raise error.ParseError('_matchfiles expected at most one '
-                                       'default mode')
-            default = value
-        else:
-            raise error.ParseError('invalid _matchfiles prefix: %s' % prefix)
-    if not default:
-        default = 'glob'
-
-    m = matchmod.match(repo.root, repo.getcwd(), pats, include=inc,
-                       exclude=exc, ctx=repo[rev], default=default)
-
-    # This directly read the changelog data as creating changectx for all
-    # revisions is quite expensive.
-    getfiles = repo.changelog.readfiles
-    wdirrev = node.wdirrev
-    def matches(x):
-        if x == wdirrev:
-            files = repo[x].files()
-        else:
-            files = getfiles(x)
-        for f in files:
-            if m(f):
-                return True
-        return False
-
-    return subset.filter(matches,
-                         condrepr=('<matchfiles patterns=%r, include=%r '
-                                   'exclude=%r, default=%r, rev=%r>',
-                                   pats, inc, exc, default, rev))
-
-@predicate('file(pattern)', safe=True)
-def hasfile(repo, subset, x):
-    """Changesets affecting files matched by pattern.
-
-    For a faster but less accurate result, consider using ``filelog()``
-    instead.
-
-    This predicate uses ``glob:`` as the default kind of pattern.
-    """
-    # i18n: "file" is a keyword
-    pat = getstring(x, _("file requires a pattern"))
-    return _matchfiles(repo, subset, ('string', 'p:' + pat))
-
-@predicate('head()', safe=True)
-def head(repo, subset, x):
-    """Changeset is a named branch head.
-    """
-    # i18n: "head" is a keyword
-    getargs(x, 0, 0, _("head takes no arguments"))
-    hs = set()
-    cl = repo.changelog
-    for ls in repo.branchmap().itervalues():
-        hs.update(cl.rev(h) for h in ls)
-    return subset & baseset(hs)
-
-@predicate('heads(set)', safe=True)
-def heads(repo, subset, x):
-    """Members of set with no children in set.
-    """
-    s = getset(repo, subset, x)
-    ps = parents(repo, subset, x)
-    return s - ps
-
-@predicate('hidden()', safe=True)
-def hidden(repo, subset, x):
-    """Hidden changesets.
-    """
-    # i18n: "hidden" is a keyword
-    getargs(x, 0, 0, _("hidden takes no arguments"))
-    hiddenrevs = repoview.filterrevs(repo, 'visible')
-    return subset & hiddenrevs
-
-@predicate('keyword(string)', safe=True)
-def keyword(repo, subset, x):
-    """Search commit message, user name, and names of changed files for
-    string. The match is case-insensitive.
-
-    For a regular expression or case sensitive search of these fields, use
-    ``grep(regex)``.
-    """
-    # i18n: "keyword" is a keyword
-    kw = encoding.lower(getstring(x, _("keyword requires a string")))
-
-    def matches(r):
-        c = repo[r]
-        return any(kw in encoding.lower(t)
-                   for t in c.files() + [c.user(), c.description()])
-
-    return subset.filter(matches, condrepr=('<keyword %r>', kw))
-
-@predicate('limit(set[, n[, offset]])', safe=True, takeorder=True)
-def limit(repo, subset, x, order):
-    """First n members of set, defaulting to 1, starting from offset.
-    """
-    args = getargsdict(x, 'limit', 'set n offset')
-    if 'set' not in args:
-        # i18n: "limit" is a keyword
-        raise error.ParseError(_("limit requires one to three arguments"))
-    # i18n: "limit" is a keyword
-    lim = getinteger(args.get('n'), _("limit expects a number"), default=1)
-    if lim < 0:
-        raise error.ParseError(_("negative number to select"))
-    # i18n: "limit" is a keyword
-    ofs = getinteger(args.get('offset'), _("limit expects a number"), default=0)
-    if ofs < 0:
-        raise error.ParseError(_("negative offset"))
-    os = getset(repo, fullreposet(repo), args['set'])
-    ls = os.slice(ofs, ofs + lim)
-    if order == followorder and lim > 1:
-        return subset & ls
-    return ls & subset
-
-@predicate('last(set, [n])', safe=True, takeorder=True)
-def last(repo, subset, x, order):
-    """Last n members of set, defaulting to 1.
-    """
-    # i18n: "last" is a keyword
-    l = getargs(x, 1, 2, _("last requires one or two arguments"))
-    lim = 1
-    if len(l) == 2:
-        # i18n: "last" is a keyword
-        lim = getinteger(l[1], _("last expects a number"))
-    if lim < 0:
-        raise error.ParseError(_("negative number to select"))
-    os = getset(repo, fullreposet(repo), l[0])
-    os.reverse()
-    ls = os.slice(0, lim)
-    if order == followorder and lim > 1:
-        return subset & ls
-    ls.reverse()
-    return ls & subset
-
-@predicate('max(set)', safe=True)
-def maxrev(repo, subset, x):
-    """Changeset with highest revision number in set.
-    """
-    os = getset(repo, fullreposet(repo), x)
-    try:
-        m = os.max()
-        if m in subset:
-            return baseset([m], datarepr=('<max %r, %r>', subset, os))
-    except ValueError:
-        # os.max() throws a ValueError when the collection is empty.
-        # Same as python's max().
-        pass
-    return baseset(datarepr=('<max %r, %r>', subset, os))
-
-@predicate('merge()', safe=True)
-def merge(repo, subset, x):
-    """Changeset is a merge changeset.
-    """
-    # i18n: "merge" is a keyword
-    getargs(x, 0, 0, _("merge takes no arguments"))
-    cl = repo.changelog
-    return subset.filter(lambda r: cl.parentrevs(r)[1] != -1,
-                         condrepr='<merge>')
-
-@predicate('branchpoint()', safe=True)
-def branchpoint(repo, subset, x):
-    """Changesets with more than one child.
-    """
-    # i18n: "branchpoint" is a keyword
-    getargs(x, 0, 0, _("branchpoint takes no arguments"))
-    cl = repo.changelog
-    if not subset:
-        return baseset()
-    # XXX this should be 'parentset.min()' assuming 'parentset' is a smartset
-    # (and if it is not, it should.)
-    baserev = min(subset)
-    parentscount = [0]*(len(repo) - baserev)
-    for r in cl.revs(start=baserev + 1):
-        for p in cl.parentrevs(r):
-            if p >= baserev:
-                parentscount[p - baserev] += 1
-    return subset.filter(lambda r: parentscount[r - baserev] > 1,
-                         condrepr='<branchpoint>')
-
-@predicate('min(set)', safe=True)
-def minrev(repo, subset, x):
-    """Changeset with lowest revision number in set.
-    """
-    os = getset(repo, fullreposet(repo), x)
-    try:
-        m = os.min()
-        if m in subset:
-            return baseset([m], datarepr=('<min %r, %r>', subset, os))
-    except ValueError:
-        # os.min() throws a ValueError when the collection is empty.
-        # Same as python's min().
-        pass
-    return baseset(datarepr=('<min %r, %r>', subset, os))
-
-@predicate('modifies(pattern)', safe=True)
-def modifies(repo, subset, x):
-    """Changesets modifying files matched by pattern.
-
-    The pattern without explicit kind like ``glob:`` is expected to be
-    relative to the current directory and match against a file or a
-    directory.
-    """
-    # i18n: "modifies" is a keyword
-    pat = getstring(x, _("modifies requires a pattern"))
-    return checkstatus(repo, subset, pat, 0)
-
-@predicate('named(namespace)')
-def named(repo, subset, x):
-    """The changesets in a given namespace.
-
-    Pattern matching is supported for `namespace`. See
-    :hg:`help revisions.patterns`.
-    """
-    # i18n: "named" is a keyword
-    args = getargs(x, 1, 1, _('named requires a namespace argument'))
-
-    ns = getstring(args[0],
-                   # i18n: "named" is a keyword
-                   _('the argument to named must be a string'))
-    kind, pattern, matcher = util.stringmatcher(ns)
-    namespaces = set()
-    if kind == 'literal':
-        if pattern not in repo.names:
-            raise error.RepoLookupError(_("namespace '%s' does not exist")
-                                        % ns)
-        namespaces.add(repo.names[pattern])
-    else:
-        for name, ns in repo.names.iteritems():
-            if matcher(name):
-                namespaces.add(ns)
-        if not namespaces:
-            raise error.RepoLookupError(_("no namespace exists"
-                                          " that match '%s'") % pattern)
-
-    names = set()
-    for ns in namespaces:
-        for name in ns.listnames(repo):
-            if name not in ns.deprecated:
-                names.update(repo[n].rev() for n in ns.nodes(repo, name))
-
-    names -= {node.nullrev}
-    return subset & names
-
-@predicate('id(string)', safe=True)
-def node_(repo, subset, x):
-    """Revision non-ambiguously specified by the given hex string prefix.
-    """
-    # i18n: "id" is a keyword
-    l = getargs(x, 1, 1, _("id requires one argument"))
-    # i18n: "id" is a keyword
-    n = getstring(l[0], _("id requires a string"))
-    if len(n) == 40:
-        try:
-            rn = repo.changelog.rev(node.bin(n))
-        except error.WdirUnsupported:
-            rn = node.wdirrev
-        except (LookupError, TypeError):
-            rn = None
-    else:
-        rn = None
-        try:
-            pm = repo.changelog._partialmatch(n)
-            if pm is not None:
-                rn = repo.changelog.rev(pm)
-        except error.WdirUnsupported:
-            rn = node.wdirrev
-
-    if rn is None:
-        return baseset()
-    result = baseset([rn])
-    return result & subset
-
-@predicate('obsolete()', safe=True)
-def obsolete(repo, subset, x):
-    """Mutable changeset with a newer version."""
-    # i18n: "obsolete" is a keyword
-    getargs(x, 0, 0, _("obsolete takes no arguments"))
-    obsoletes = obsmod.getrevs(repo, 'obsolete')
-    return subset & obsoletes
-
-@predicate('only(set, [set])', safe=True)
-def only(repo, subset, x):
-    """Changesets that are ancestors of the first set that are not ancestors
-    of any other head in the repo. If a second set is specified, the result
-    is ancestors of the first set that are not ancestors of the second set
-    (i.e. ::<set1> - ::<set2>).
-    """
-    cl = repo.changelog
-    # i18n: "only" is a keyword
-    args = getargs(x, 1, 2, _('only takes one or two arguments'))
-    include = getset(repo, fullreposet(repo), args[0])
-    if len(args) == 1:
-        if not include:
-            return baseset()
-
-        descendants = set(_revdescendants(repo, include, False))
-        exclude = [rev for rev in cl.headrevs()
-            if not rev in descendants and not rev in include]
-    else:
-        exclude = getset(repo, fullreposet(repo), args[1])
-
-    results = set(cl.findmissingrevs(common=exclude, heads=include))
-    # XXX we should turn this into a baseset instead of a set, smartset may do
-    # some optimizations from the fact this is a baseset.
-    return subset & results
-
-@predicate('origin([set])', safe=True)
-def origin(repo, subset, x):
-    """
-    Changesets that were specified as a source for the grafts, transplants or
-    rebases that created the given revisions.  Omitting the optional set is the
-    same as passing all().  If a changeset created by these operations is itself
-    specified as a source for one of these operations, only the source changeset
-    for the first operation is selected.
-    """
-    if x is not None:
-        dests = getset(repo, fullreposet(repo), x)
-    else:
-        dests = fullreposet(repo)
-
-    def _firstsrc(rev):
-        src = _getrevsource(repo, rev)
-        if src is None:
-            return None
-
-        while True:
-            prev = _getrevsource(repo, src)
-
-            if prev is None:
-                return src
-            src = prev
-
-    o = {_firstsrc(r) for r in dests}
-    o -= {None}
-    # XXX we should turn this into a baseset instead of a set, smartset may do
-    # some optimizations from the fact this is a baseset.
-    return subset & o
-
-@predicate('outgoing([path])', safe=False)
-def outgoing(repo, subset, x):
-    """Changesets not found in the specified destination repository, or the
-    default push location.
-    """
-    # Avoid cycles.
-    from . import (
-        discovery,
-        hg,
-    )
-    # i18n: "outgoing" is a keyword
-    l = getargs(x, 0, 1, _("outgoing takes one or no arguments"))
-    # i18n: "outgoing" is a keyword
-    dest = l and getstring(l[0], _("outgoing requires a repository path")) or ''
-    dest = repo.ui.expandpath(dest or 'default-push', dest or 'default')
-    dest, branches = hg.parseurl(dest)
-    revs, checkout = hg.addbranchrevs(repo, repo, branches, [])
-    if revs:
-        revs = [repo.lookup(rev) for rev in revs]
-    other = hg.peer(repo, {}, dest)
-    repo.ui.pushbuffer()
-    outgoing = discovery.findcommonoutgoing(repo, other, onlyheads=revs)
-    repo.ui.popbuffer()
-    cl = repo.changelog
-    o = {cl.rev(r) for r in outgoing.missing}
-    return subset & o
-
-@predicate('p1([set])', safe=True)
-def p1(repo, subset, x):
-    """First parent of changesets in set, or the working directory.
-    """
-    if x is None:
-        p = repo[x].p1().rev()
-        if p >= 0:
-            return subset & baseset([p])
-        return baseset()
-
-    ps = set()
-    cl = repo.changelog
-    for r in getset(repo, fullreposet(repo), x):
-        try:
-            ps.add(cl.parentrevs(r)[0])
-        except error.WdirUnsupported:
-            ps.add(repo[r].parents()[0].rev())
-    ps -= {node.nullrev}
-    # XXX we should turn this into a baseset instead of a set, smartset may do
-    # some optimizations from the fact this is a baseset.
-    return subset & ps
-
-@predicate('p2([set])', safe=True)
-def p2(repo, subset, x):
-    """Second parent of changesets in set, or the working directory.
-    """
-    if x is None:
-        ps = repo[x].parents()
-        try:
-            p = ps[1].rev()
-            if p >= 0:
-                return subset & baseset([p])
-            return baseset()
-        except IndexError:
-            return baseset()
-
-    ps = set()
-    cl = repo.changelog
-    for r in getset(repo, fullreposet(repo), x):
-        try:
-            ps.add(cl.parentrevs(r)[1])
-        except error.WdirUnsupported:
-            parents = repo[r].parents()
-            if len(parents) == 2:
-                ps.add(parents[1])
-    ps -= {node.nullrev}
-    # XXX we should turn this into a baseset instead of a set, smartset may do
-    # some optimizations from the fact this is a baseset.
-    return subset & ps
-
-def parentpost(repo, subset, x, order):
-    return p1(repo, subset, x)
-
-@predicate('parents([set])', safe=True)
-def parents(repo, subset, x):
-    """
-    The set of all parents for all changesets in set, or the working directory.
-    """
-    if x is None:
-        ps = set(p.rev() for p in repo[x].parents())
-    else:
-        ps = set()
-        cl = repo.changelog
-        up = ps.update
-        parentrevs = cl.parentrevs
-        for r in getset(repo, fullreposet(repo), x):
-            try:
-                up(parentrevs(r))
-            except error.WdirUnsupported:
-                up(p.rev() for p in repo[r].parents())
-    ps -= {node.nullrev}
-    return subset & ps
-
-def _phase(repo, subset, *targets):
-    """helper to select all rev in <targets> phases"""
-    s = repo._phasecache.getrevset(repo, targets)
-    return subset & s
-
-@predicate('draft()', safe=True)
-def draft(repo, subset, x):
-    """Changeset in draft phase."""
-    # i18n: "draft" is a keyword
-    getargs(x, 0, 0, _("draft takes no arguments"))
-    target = phases.draft
-    return _phase(repo, subset, target)
-
-@predicate('secret()', safe=True)
-def secret(repo, subset, x):
-    """Changeset in secret phase."""
-    # i18n: "secret" is a keyword
-    getargs(x, 0, 0, _("secret takes no arguments"))
-    target = phases.secret
-    return _phase(repo, subset, target)
-
-def parentspec(repo, subset, x, n, order):
-    """``set^0``
-    The set.
-    ``set^1`` (or ``set^``), ``set^2``
-    First or second parent, respectively, of all changesets in set.
-    """
-    try:
-        n = int(n[1])
-        if n not in (0, 1, 2):
-            raise ValueError
-    except (TypeError, ValueError):
-        raise error.ParseError(_("^ expects a number 0, 1, or 2"))
-    ps = set()
-    cl = repo.changelog
-    for r in getset(repo, fullreposet(repo), x):
-        if n == 0:
-            ps.add(r)
-        elif n == 1:
-            try:
-                ps.add(cl.parentrevs(r)[0])
-            except error.WdirUnsupported:
-                ps.add(repo[r].parents()[0].rev())
-        else:
-            try:
-                parents = cl.parentrevs(r)
-                if parents[1] != node.nullrev:
-                    ps.add(parents[1])
-            except error.WdirUnsupported:
-                parents = repo[r].parents()
-                if len(parents) == 2:
-                    ps.add(parents[1].rev())
-    return subset & ps
-
-@predicate('present(set)', safe=True)
-def present(repo, subset, x):
-    """An empty set, if any revision in set isn't found; otherwise,
-    all revisions in set.
-
-    If any of specified revisions is not present in the local repository,
-    the query is normally aborted. But this predicate allows the query
-    to continue even in such cases.
-    """
-    try:
-        return getset(repo, subset, x)
-    except error.RepoLookupError:
-        return baseset()
-
-# for internal use
-@predicate('_notpublic', safe=True)
-def _notpublic(repo, subset, x):
-    getargs(x, 0, 0, "_notpublic takes no arguments")
-    return _phase(repo, subset, phases.draft, phases.secret)
-
-@predicate('public()', safe=True)
-def public(repo, subset, x):
-    """Changeset in public phase."""
-    # i18n: "public" is a keyword
-    getargs(x, 0, 0, _("public takes no arguments"))
-    phase = repo._phasecache.phase
-    target = phases.public
-    condition = lambda r: phase(repo, r) == target
-    return subset.filter(condition, condrepr=('<phase %r>', target),
-                         cache=False)
-
-@predicate('remote([id [,path]])', safe=False)
-def remote(repo, subset, x):
-    """Local revision that corresponds to the given identifier in a
-    remote repository, if present. Here, the '.' identifier is a
-    synonym for the current local branch.
-    """
-
-    from . import hg # avoid start-up nasties
-    # i18n: "remote" is a keyword
-    l = getargs(x, 0, 2, _("remote takes zero, one, or two arguments"))
-
-    q = '.'
-    if len(l) > 0:
-    # i18n: "remote" is a keyword
-        q = getstring(l[0], _("remote requires a string id"))
-    if q == '.':
-        q = repo['.'].branch()
-
-    dest = ''
-    if len(l) > 1:
-        # i18n: "remote" is a keyword
-        dest = getstring(l[1], _("remote requires a repository path"))
-    dest = repo.ui.expandpath(dest or 'default')
-    dest, branches = hg.parseurl(dest)
-    revs, checkout = hg.addbranchrevs(repo, repo, branches, [])
-    if revs:
-        revs = [repo.lookup(rev) for rev in revs]
-    other = hg.peer(repo, {}, dest)
-    n = other.lookup(q)
-    if n in repo:
-        r = repo[n].rev()
-        if r in subset:
-            return baseset([r])
-    return baseset()
-
-@predicate('removes(pattern)', safe=True)
-def removes(repo, subset, x):
-    """Changesets which remove files matching pattern.
-
-    The pattern without explicit kind like ``glob:`` is expected to be
-    relative to the current directory and match against a file or a
-    directory.
-    """
-    # i18n: "removes" is a keyword
-    pat = getstring(x, _("removes requires a pattern"))
-    return checkstatus(repo, subset, pat, 2)
-
-@predicate('rev(number)', safe=True)
-def rev(repo, subset, x):
-    """Revision with the given numeric identifier.
-    """
-    # i18n: "rev" is a keyword
-    l = getargs(x, 1, 1, _("rev requires one argument"))
-    try:
-        # i18n: "rev" is a keyword
-        l = int(getstring(l[0], _("rev requires a number")))
-    except (TypeError, ValueError):
-        # i18n: "rev" is a keyword
-        raise error.ParseError(_("rev expects a number"))
-    if l not in repo.changelog and l not in (node.nullrev, node.wdirrev):
-        return baseset()
-    return subset & baseset([l])
-
-@predicate('matching(revision [, field])', safe=True)
-def matching(repo, subset, x):
-    """Changesets in which a given set of fields match the set of fields in the
-    selected revision or set.
-
-    To match more than one field pass the list of fields to match separated
-    by spaces (e.g. ``author description``).
-
-    Valid fields are most regular revision fields and some special fields.
-
-    Regular revision fields are ``description``, ``author``, ``branch``,
-    ``date``, ``files``, ``phase``, ``parents``, ``substate``, ``user``
-    and ``diff``.
-    Note that ``author`` and ``user`` are synonyms. ``diff`` refers to the
-    contents of the revision. Two revisions matching their ``diff`` will
-    also match their ``files``.
-
-    Special fields are ``summary`` and ``metadata``:
-    ``summary`` matches the first line of the description.
-    ``metadata`` is equivalent to matching ``description user date``
-    (i.e. it matches the main metadata fields).
-
-    ``metadata`` is the default field which is used when no fields are
-    specified. You can match more than one field at a time.
-    """
-    # i18n: "matching" is a keyword
-    l = getargs(x, 1, 2, _("matching takes 1 or 2 arguments"))
-
-    revs = getset(repo, fullreposet(repo), l[0])
-
-    fieldlist = ['metadata']
-    if len(l) > 1:
-            fieldlist = getstring(l[1],
-                # i18n: "matching" is a keyword
-                _("matching requires a string "
-                "as its second argument")).split()
-
-    # Make sure that there are no repeated fields,
-    # expand the 'special' 'metadata' field type
-    # and check the 'files' whenever we check the 'diff'
-    fields = []
-    for field in fieldlist:
-        if field == 'metadata':
-            fields += ['user', 'description', 'date']
-        elif field == 'diff':
-            # a revision matching the diff must also match the files
-            # since matching the diff is very costly, make sure to
-            # also match the files first
-            fields += ['files', 'diff']
-        else:
-            if field == 'author':
-                field = 'user'
-            fields.append(field)
-    fields = set(fields)
-    if 'summary' in fields and 'description' in fields:
-        # If a revision matches its description it also matches its summary
-        fields.discard('summary')
-
-    # We may want to match more than one field
-    # Not all fields take the same amount of time to be matched
-    # Sort the selected fields in order of increasing matching cost
-    fieldorder = ['phase', 'parents', 'user', 'date', 'branch', 'summary',
-        'files', 'description', 'substate', 'diff']
-    def fieldkeyfunc(f):
-        try:
-            return fieldorder.index(f)
-        except ValueError:
-            # assume an unknown field is very costly
-            return len(fieldorder)
-    fields = list(fields)
-    fields.sort(key=fieldkeyfunc)
-
-    # Each field will be matched with its own "getfield" function
-    # which will be added to the getfieldfuncs array of functions
-    getfieldfuncs = []
-    _funcs = {
-        'user': lambda r: repo[r].user(),
-        'branch': lambda r: repo[r].branch(),
-        'date': lambda r: repo[r].date(),
-        'description': lambda r: repo[r].description(),
-        'files': lambda r: repo[r].files(),
-        'parents': lambda r: repo[r].parents(),
-        'phase': lambda r: repo[r].phase(),
-        'substate': lambda r: repo[r].substate,
-        'summary': lambda r: repo[r].description().splitlines()[0],
-        'diff': lambda r: list(repo[r].diff(git=True),)
-    }
-    for info in fields:
-        getfield = _funcs.get(info, None)
-        if getfield is None:
-            raise error.ParseError(
-                # i18n: "matching" is a keyword
-                _("unexpected field name passed to matching: %s") % info)
-        getfieldfuncs.append(getfield)
-    # convert the getfield array of functions into a "getinfo" function
-    # which returns an array of field values (or a single value if there
-    # is only one field to match)
-    getinfo = lambda r: [f(r) for f in getfieldfuncs]
-
-    def matches(x):
-        for rev in revs:
-            target = getinfo(rev)
-            match = True
-            for n, f in enumerate(getfieldfuncs):
-                if target[n] != f(x):
-                    match = False
-            if match:
-                return True
-        return False
-
-    return subset.filter(matches, condrepr=('<matching%r %r>', fields, revs))
-
-@predicate('reverse(set)', safe=True, takeorder=True)
-def reverse(repo, subset, x, order):
-    """Reverse order of set.
-    """
-    l = getset(repo, subset, x)
-    if order == defineorder:
-        l.reverse()
-    return l
-
-@predicate('roots(set)', safe=True)
-def roots(repo, subset, x):
-    """Changesets in set with no parent changeset in set.
-    """
-    s = getset(repo, fullreposet(repo), x)
-    parents = repo.changelog.parentrevs
-    def filter(r):
-        for p in parents(r):
-            if 0 <= p and p in s:
-                return False
-        return True
-    return subset & s.filter(filter, condrepr='<roots>')
-
-_sortkeyfuncs = {
-    'rev': lambda c: c.rev(),
-    'branch': lambda c: c.branch(),
-    'desc': lambda c: c.description(),
-    'user': lambda c: c.user(),
-    'author': lambda c: c.user(),
-    'date': lambda c: c.date()[0],
-}
-
-def _getsortargs(x):
-    """Parse sort options into (set, [(key, reverse)], opts)"""
-    args = getargsdict(x, 'sort', 'set keys topo.firstbranch')
-    if 'set' not in args:
-        # i18n: "sort" is a keyword
-        raise error.ParseError(_('sort requires one or two arguments'))
-    keys = "rev"
-    if 'keys' in args:
-        # i18n: "sort" is a keyword
-        keys = getstring(args['keys'], _("sort spec must be a string"))
-
-    keyflags = []
-    for k in keys.split():
-        fk = k
-        reverse = (k[0] == '-')
-        if reverse:
-            k = k[1:]
-        if k not in _sortkeyfuncs and k != 'topo':
-            raise error.ParseError(_("unknown sort key %r") % fk)
-        keyflags.append((k, reverse))
-
-    if len(keyflags) > 1 and any(k == 'topo' for k, reverse in keyflags):
-        # i18n: "topo" is a keyword
-        raise error.ParseError(_('topo sort order cannot be combined '
-                                 'with other sort keys'))
-
-    opts = {}
-    if 'topo.firstbranch' in args:
-        if any(k == 'topo' for k, reverse in keyflags):
-            opts['topo.firstbranch'] = args['topo.firstbranch']
-        else:
-            # i18n: "topo" and "topo.firstbranch" are keywords
-            raise error.ParseError(_('topo.firstbranch can only be used '
-                                     'when using the topo sort key'))
-
-    return args['set'], keyflags, opts
-
-@predicate('sort(set[, [-]key... [, ...]])', safe=True, takeorder=True)
-def sort(repo, subset, x, order):
-    """Sort set by keys. The default sort order is ascending, specify a key
-    as ``-key`` to sort in descending order.
-
-    The keys can be:
-
-    - ``rev`` for the revision number,
-    - ``branch`` for the branch name,
-    - ``desc`` for the commit message (description),
-    - ``user`` for user name (``author`` can be used as an alias),
-    - ``date`` for the commit date
-    - ``topo`` for a reverse topographical sort
-
-    The ``topo`` sort order cannot be combined with other sort keys. This sort
-    takes one optional argument, ``topo.firstbranch``, which takes a revset that
-    specifies what topographical branches to prioritize in the sort.
-
-    """
-    s, keyflags, opts = _getsortargs(x)
-    revs = getset(repo, subset, s)
-
-    if not keyflags or order != defineorder:
-        return revs
-    if len(keyflags) == 1 and keyflags[0][0] == "rev":
-        revs.sort(reverse=keyflags[0][1])
-        return revs
-    elif keyflags[0][0] == "topo":
-        firstbranch = ()
-        if 'topo.firstbranch' in opts:
-            firstbranch = getset(repo, subset, opts['topo.firstbranch'])
-        revs = baseset(_toposort(revs, repo.changelog.parentrevs, firstbranch),
-                       istopo=True)
-        if keyflags[0][1]:
-            revs.reverse()
-        return revs
-
-    # sort() is guaranteed to be stable
-    ctxs = [repo[r] for r in revs]
-    for k, reverse in reversed(keyflags):
-        ctxs.sort(key=_sortkeyfuncs[k], reverse=reverse)
-    return baseset([c.rev() for c in ctxs])
-
-def _toposort(revs, parentsfunc, firstbranch=()):
+def toposort(revs, parentsfunc, firstbranch=()):
     """Yield revisions from heads to roots one (topo) branch at a time.
 
     This function aims to be used by a graph generator that wishes to minimize
@@ -2066,274 +337,3 @@  def _toposort(revs, parentsfunc, firstbr
     for g in groups:
         for r in g[0]:
             yield r
-
-@predicate('subrepo([pattern])')
-def subrepo(repo, subset, x):
-    """Changesets that add, modify or remove the given subrepo.  If no subrepo
-    pattern is named, any subrepo changes are returned.
-    """
-    # i18n: "subrepo" is a keyword
-    args = getargs(x, 0, 1, _('subrepo takes at most one argument'))
-    pat = None
-    if len(args) != 0:
-        pat = getstring(args[0], _("subrepo requires a pattern"))
-
-    m = matchmod.exact(repo.root, repo.root, ['.hgsubstate'])
-
-    def submatches(names):
-        k, p, m = util.stringmatcher(pat)
-        for name in names:
-            if m(name):
-                yield name
-
-    def matches(x):
-        c = repo[x]
-        s = repo.status(c.p1().node(), c.node(), match=m)
-
-        if pat is None:
-            return s.added or s.modified or s.removed
-
-        if s.added:
-            return any(submatches(c.substate.keys()))
-
-        if s.modified:
-            subs = set(c.p1().substate.keys())
-            subs.update(c.substate.keys())
-
-            for path in submatches(subs):
-                if c.p1().substate.get(path) != c.substate.get(path):
-                    return True
-
-        if s.removed:
-            return any(submatches(c.p1().substate.keys()))
-
-        return False
-
-    return subset.filter(matches, condrepr=('<subrepo %r>', pat))
-
-def _substringmatcher(pattern, casesensitive=True):
-    kind, pattern, matcher = util.stringmatcher(pattern,
-                                                casesensitive=casesensitive)
-    if kind == 'literal':
-        if not casesensitive:
-            pattern = encoding.lower(pattern)
-            matcher = lambda s: pattern in encoding.lower(s)
-        else:
-            matcher = lambda s: pattern in s
-    return kind, pattern, matcher
-
-@predicate('tag([name])', safe=True)
-def tag(repo, subset, x):
-    """The specified tag by name, or all tagged revisions if no name is given.
-
-    Pattern matching is supported for `name`. See
-    :hg:`help revisions.patterns`.
-    """
-    # i18n: "tag" is a keyword
-    args = getargs(x, 0, 1, _("tag takes one or no arguments"))
-    cl = repo.changelog
-    if args:
-        pattern = getstring(args[0],
-                            # i18n: "tag" is a keyword
-                            _('the argument to tag must be a string'))
-        kind, pattern, matcher = util.stringmatcher(pattern)
-        if kind == 'literal':
-            # avoid resolving all tags
-            tn = repo._tagscache.tags.get(pattern, None)
-            if tn is None:
-                raise error.RepoLookupError(_("tag '%s' does not exist")
-                                            % pattern)
-            s = {repo[tn].rev()}
-        else:
-            s = {cl.rev(n) for t, n in repo.tagslist() if matcher(t)}
-    else:
-        s = {cl.rev(n) for t, n in repo.tagslist() if t != 'tip'}
-    return subset & s
-
-@predicate('tagged', safe=True)
-def tagged(repo, subset, x):
-    return tag(repo, subset, x)
-
-@predicate('unstable()', safe=True)
-def unstable(repo, subset, x):
-    """Non-obsolete changesets with obsolete ancestors.
-    """
-    # i18n: "unstable" is a keyword
-    getargs(x, 0, 0, _("unstable takes no arguments"))
-    unstables = obsmod.getrevs(repo, 'unstable')
-    return subset & unstables
-
-
-@predicate('user(string)', safe=True)
-def user(repo, subset, x):
-    """User name contains string. The match is case-insensitive.
-
-    Pattern matching is supported for `string`. See
-    :hg:`help revisions.patterns`.
-    """
-    return author(repo, subset, x)
-
-@predicate('wdir()', safe=True)
-def wdir(repo, subset, x):
-    """Working directory. (EXPERIMENTAL)"""
-    # i18n: "wdir" is a keyword
-    getargs(x, 0, 0, _("wdir takes no arguments"))
-    if node.wdirrev in subset or isinstance(subset, fullreposet):
-        return baseset([node.wdirrev])
-    return baseset()
-
-def _orderedlist(repo, subset, x):
-    s = getstring(x, "internal error")
-    if not s:
-        return baseset()
-    # remove duplicates here. it's difficult for caller to deduplicate sets
-    # because different symbols can point to the same rev.
-    cl = repo.changelog
-    ls = []
-    seen = set()
-    for t in s.split('\0'):
-        try:
-            # fast path for integer revision
-            r = int(t)
-            if str(r) != t or r not in cl:
-                raise ValueError
-            revs = [r]
-        except ValueError:
-            revs = stringset(repo, subset, t)
-
-        for r in revs:
-            if r in seen:
-                continue
-            if (r in subset
-                or r == node.nullrev and isinstance(subset, fullreposet)):
-                ls.append(r)
-            seen.add(r)
-    return baseset(ls)
-
-# for internal use
-@predicate('_list', safe=True, takeorder=True)
-def _list(repo, subset, x, order):
-    if order == followorder:
-        # slow path to take the subset order
-        return subset & _orderedlist(repo, fullreposet(repo), x)
-    else:
-        return _orderedlist(repo, subset, x)
-
-def _orderedintlist(repo, subset, x):
-    s = getstring(x, "internal error")
-    if not s:
-        return baseset()
-    ls = [int(r) for r in s.split('\0')]
-    s = subset
-    return baseset([r for r in ls if r in s])
-
-# for internal use
-@predicate('_intlist', safe=True, takeorder=True)
-def _intlist(repo, subset, x, order):
-    if order == followorder:
-        # slow path to take the subset order
-        return subset & _orderedintlist(repo, fullreposet(repo), x)
-    else:
-        return _orderedintlist(repo, subset, x)
-
-def _orderedhexlist(repo, subset, x):
-    s = getstring(x, "internal error")
-    if not s:
-        return baseset()
-    cl = repo.changelog
-    ls = [cl.rev(node.bin(r)) for r in s.split('\0')]
-    s = subset
-    return baseset([r for r in ls if r in s])
-
-# for internal use
-@predicate('_hexlist', safe=True, takeorder=True)
-def _hexlist(repo, subset, x, order):
-    if order == followorder:
-        # slow path to take the subset order
-        return subset & _orderedhexlist(repo, fullreposet(repo), x)
-    else:
-        return _orderedhexlist(repo, subset, x)
-
-methods = {
-    "range": rangeset,
-    "rangeall": rangeall,
-    "rangepre": rangepre,
-    "rangepost": rangepost,
-    "dagrange": dagrange,
-    "string": stringset,
-    "symbol": stringset,
-    "and": andset,
-    "or": orset,
-    "not": notset,
-    "difference": differenceset,
-    "list": listset,
-    "keyvalue": keyvaluepair,
-    "func": func,
-    "ancestor": ancestorspec,
-    "parent": parentspec,
-    "parentpost": parentpost,
-}
-
-def posttreebuilthook(tree, repo):
-    # hook for extensions to execute code on the optimized tree
-    pass
-
-def match(ui, spec, repo=None, order=defineorder):
-    """Create a matcher for a single revision spec
-
-    If order=followorder, a matcher takes the ordering specified by the input
-    set.
-    """
-    return matchany(ui, [spec], repo=repo, order=order)
-
-def matchany(ui, specs, repo=None, order=defineorder):
-    """Create a matcher that will include any revisions matching one of the
-    given specs
-
-    If order=followorder, a matcher takes the ordering specified by the input
-    set.
-    """
-    if not specs:
-        def mfunc(repo, subset=None):
-            return baseset()
-        return mfunc
-    if not all(specs):
-        raise error.ParseError(_("empty query"))
-    lookup = None
-    if repo:
-        lookup = repo.__contains__
-    if len(specs) == 1:
-        tree = revsetlang.parse(specs[0], lookup)
-    else:
-        tree = ('or',
-                ('list',) + tuple(revsetlang.parse(s, lookup) for s in specs))
-
-    if ui:
-        tree = revsetlang.expandaliases(ui, tree)
-    tree = revsetlang.foldconcat(tree)
-    tree = revsetlang.analyze(tree, order)
-    tree = revsetlang.optimize(tree)
-    posttreebuilthook(tree, repo)
-    return makematcher(tree)
-
-def makematcher(tree):
-    """Create a matcher from an evaluatable tree"""
-    def mfunc(repo, subset=None):
-        if subset is None:
-            subset = fullreposet(repo)
-        return getset(repo, subset, tree)
-    return mfunc
-
-def loadpredicate(ui, extname, registrarobj):
-    """Load revset predicates from specified registrarobj
-    """
-    for name, func in registrarobj._table.iteritems():
-        symbols[name] = func
-        if func._safe:
-            safesymbols.add(name)
-
-# load built-in predicates explicitly to setup safesymbols
-loadpredicate(None, None, predicate)
-
-# tell hggettext to extract docstrings from these functions:
-i18nfunctions = symbols.values()
diff --git a/mercurial/graphmod.py b/mercurial/graphmod.py
--- a/mercurial/graphmod.py
+++ b/mercurial/graphmod.py
@@ -21,7 +21,7 @@  from __future__ import absolute_import
 
 from .node import nullrev
 from . import (
-    revset,
+    dagop,
     smartset,
     util,
 )
@@ -70,7 +70,7 @@  def dagwalker(repo, revs):
                 # through all revs (issue4782)
                 if not isinstance(revs, smartset.baseset):
                     revs = smartset.baseset(revs)
-                gp = gpcache[mpar] = sorted(set(revset.reachableroots(
+                gp = gpcache[mpar] = sorted(set(dagop.reachableroots(
                     repo, revs, [mpar])))
             if not gp:
                 parents.append((MISSINGPARENT, mpar))
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -7,11 +7,11 @@ 
 
 from __future__ import absolute_import
 
-import heapq
 import re
 
 from .i18n import _
 from . import (
+    dagop,
     destutil,
     encoding,
     error,
@@ -49,128 +49,6 @@  generatorset = smartset.generatorset
 spanset = smartset.spanset
 fullreposet = smartset.fullreposet
 
-def _revancestors(repo, revs, followfirst):
-    """Like revlog.ancestors(), but supports followfirst."""
-    if followfirst:
-        cut = 1
-    else:
-        cut = None
-    cl = repo.changelog
-
-    def iterate():
-        revs.sort(reverse=True)
-        irevs = iter(revs)
-        h = []
-
-        inputrev = next(irevs, None)
-        if inputrev is not None:
-            heapq.heappush(h, -inputrev)
-
-        seen = set()
-        while h:
-            current = -heapq.heappop(h)
-            if current == inputrev:
-                inputrev = next(irevs, None)
-                if inputrev is not None:
-                    heapq.heappush(h, -inputrev)
-            if current not in seen:
-                seen.add(current)
-                yield current
-                try:
-                    for parent in cl.parentrevs(current)[:cut]:
-                        if parent != node.nullrev:
-                            heapq.heappush(h, -parent)
-                except error.WdirUnsupported:
-                    for parent in repo[current].parents()[:cut]:
-                        if parent.rev() != node.nullrev:
-                            heapq.heappush(h, -parent.rev())
-
-    return generatorset(iterate(), iterasc=False)
-
-def _revdescendants(repo, revs, followfirst):
-    """Like revlog.descendants() but supports followfirst."""
-    if followfirst:
-        cut = 1
-    else:
-        cut = None
-
-    def iterate():
-        cl = repo.changelog
-        # XXX this should be 'parentset.min()' assuming 'parentset' is a
-        # smartset (and if it is not, it should.)
-        first = min(revs)
-        nullrev = node.nullrev
-        if first == nullrev:
-            # Are there nodes with a null first parent and a non-null
-            # second one? Maybe. Do we care? Probably not.
-            for i in cl:
-                yield i
-        else:
-            seen = set(revs)
-            for i in cl.revs(first + 1):
-                for x in cl.parentrevs(i)[:cut]:
-                    if x != nullrev and x in seen:
-                        seen.add(i)
-                        yield i
-                        break
-
-    return generatorset(iterate(), iterasc=True)
-
-def _reachablerootspure(repo, minroot, roots, heads, includepath):
-    """return (heads(::<roots> and ::<heads>))
-
-    If includepath is True, return (<roots>::<heads>)."""
-    if not roots:
-        return []
-    parentrevs = repo.changelog.parentrevs
-    roots = set(roots)
-    visit = list(heads)
-    reachable = set()
-    seen = {}
-    # prefetch all the things! (because python is slow)
-    reached = reachable.add
-    dovisit = visit.append
-    nextvisit = visit.pop
-    # open-code the post-order traversal due to the tiny size of
-    # sys.getrecursionlimit()
-    while visit:
-        rev = nextvisit()
-        if rev in roots:
-            reached(rev)
-            if not includepath:
-                continue
-        parents = parentrevs(rev)
-        seen[rev] = parents
-        for parent in parents:
-            if parent >= minroot and parent not in seen:
-                dovisit(parent)
-    if not reachable:
-        return baseset()
-    if not includepath:
-        return reachable
-    for rev in sorted(seen):
-        for parent in seen[rev]:
-            if parent in reachable:
-                reached(rev)
-    return reachable
-
-def reachableroots(repo, roots, heads, includepath=False):
-    """return (heads(::<roots> and ::<heads>))
-
-    If includepath is True, return (<roots>::<heads>)."""
-    if not roots:
-        return baseset()
-    minroot = roots.min()
-    roots = list(roots)
-    heads = list(heads)
-    try:
-        revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
-    except AttributeError:
-        revs = _reachablerootspure(repo, minroot, roots, heads, includepath)
-    revs = baseset(revs)
-    revs.sort()
-    return revs
-
 # helpers
 
 def getset(repo, subset, x):
@@ -242,8 +120,8 @@  def _makerangeset(repo, subset, m, n, or
 
 def dagrange(repo, subset, x, y, order):
     r = fullreposet(repo)
-    xs = reachableroots(repo, getset(repo, r, x), getset(repo, r, y),
-                         includepath=True)
+    xs = dagop.reachableroots(repo, getset(repo, r, x), getset(repo, r, y),
+                              includepath=True)
     return subset & xs
 
 def andset(repo, subset, x, y, order):
@@ -364,7 +242,7 @@  def _ancestors(repo, subset, x, followfi
     heads = getset(repo, fullreposet(repo), x)
     if not heads:
         return baseset()
-    s = _revancestors(repo, heads, followfirst)
+    s = dagop.revancestors(repo, heads, followfirst)
     return subset & s
 
 @predicate('ancestors(set)', safe=True)
@@ -697,7 +575,7 @@  def _descendants(repo, subset, x, follow
     roots = getset(repo, fullreposet(repo), x)
     if not roots:
         return baseset()
-    s = _revdescendants(repo, roots, followfirst)
+    s = dagop.revdescendants(repo, roots, followfirst)
 
     # Both sets need to be ascending in order to lazily return the union
     # in the correct order.
@@ -916,7 +794,7 @@  def _follow(repo, subset, x, name, follo
             # include the revision responsible for the most recent version
             s.add(fctx.introrev())
     else:
-        s = _revancestors(repo, baseset([c.rev()]), followfirst)
+        s = dagop.revancestors(repo, baseset([c.rev()]), followfirst)
 
     return subset & s
 
@@ -1355,7 +1233,7 @@  def only(repo, subset, x):
         if not include:
             return baseset()
 
-        descendants = set(_revdescendants(repo, include, False))
+        descendants = set(dagop.revdescendants(repo, include, False))
         exclude = [rev for rev in cl.headrevs()
             if not rev in descendants and not rev in include]
     else:
@@ -1857,7 +1735,8 @@  def sort(repo, subset, x, order):
         firstbranch = ()
         if 'topo.firstbranch' in opts:
             firstbranch = getset(repo, subset, opts['topo.firstbranch'])
-        revs = baseset(_toposort(revs, repo.changelog.parentrevs, firstbranch),
+        revs = baseset(dagop.toposort(revs, repo.changelog.parentrevs,
+                                      firstbranch),
                        istopo=True)
         if keyflags[0][1]:
             revs.reverse()
@@ -1869,204 +1748,6 @@  def sort(repo, subset, x, order):
         ctxs.sort(key=_sortkeyfuncs[k], reverse=reverse)
     return baseset([c.rev() for c in ctxs])
 
-def _toposort(revs, parentsfunc, firstbranch=()):
-    """Yield revisions from heads to roots one (topo) branch at a time.
-
-    This function aims to be used by a graph generator that wishes to minimize
-    the number of parallel branches and their interleaving.
-
-    Example iteration order (numbers show the "true" order in a changelog):
-
-      o  4
-      |
-      o  1
-      |
-      | o  3
-      | |
-      | o  2
-      |/
-      o  0
-
-    Note that the ancestors of merges are understood by the current
-    algorithm to be on the same branch. This means no reordering will
-    occur behind a merge.
-    """
-
-    ### Quick summary of the algorithm
-    #
-    # This function is based around a "retention" principle. We keep revisions
-    # in memory until we are ready to emit a whole branch that immediately
-    # "merges" into an existing one. This reduces the number of parallel
-    # branches with interleaved revisions.
-    #
-    # During iteration revs are split into two groups:
-    # A) revision already emitted
-    # B) revision in "retention". They are stored as different subgroups.
-    #
-    # for each REV, we do the following logic:
-    #
-    #   1) if REV is a parent of (A), we will emit it. If there is a
-    #   retention group ((B) above) that is blocked on REV being
-    #   available, we emit all the revisions out of that retention
-    #   group first.
-    #
-    #   2) else, we'll search for a subgroup in (B) awaiting for REV to be
-    #   available, if such subgroup exist, we add REV to it and the subgroup is
-    #   now awaiting for REV.parents() to be available.
-    #
-    #   3) finally if no such group existed in (B), we create a new subgroup.
-    #
-    #
-    # To bootstrap the algorithm, we emit the tipmost revision (which
-    # puts it in group (A) from above).
-
-    revs.sort(reverse=True)
-
-    # Set of parents of revision that have been emitted. They can be considered
-    # unblocked as the graph generator is already aware of them so there is no
-    # need to delay the revisions that reference them.
-    #
-    # If someone wants to prioritize a branch over the others, pre-filling this
-    # set will force all other branches to wait until this branch is ready to be
-    # emitted.
-    unblocked = set(firstbranch)
-
-    # list of groups waiting to be displayed, each group is defined by:
-    #
-    #   (revs:    lists of revs waiting to be displayed,
-    #    blocked: set of that cannot be displayed before those in 'revs')
-    #
-    # The second value ('blocked') correspond to parents of any revision in the
-    # group ('revs') that is not itself contained in the group. The main idea
-    # of this algorithm is to delay as much as possible the emission of any
-    # revision.  This means waiting for the moment we are about to display
-    # these parents to display the revs in a group.
-    #
-    # This first implementation is smart until it encounters a merge: it will
-    # emit revs as soon as any parent is about to be emitted and can grow an
-    # arbitrary number of revs in 'blocked'. In practice this mean we properly
-    # retains new branches but gives up on any special ordering for ancestors
-    # of merges. The implementation can be improved to handle this better.
-    #
-    # The first subgroup is special. It corresponds to all the revision that
-    # were already emitted. The 'revs' lists is expected to be empty and the
-    # 'blocked' set contains the parents revisions of already emitted revision.
-    #
-    # You could pre-seed the <parents> set of groups[0] to a specific
-    # changesets to select what the first emitted branch should be.
-    groups = [([], unblocked)]
-    pendingheap = []
-    pendingset = set()
-
-    heapq.heapify(pendingheap)
-    heappop = heapq.heappop
-    heappush = heapq.heappush
-    for currentrev in revs:
-        # Heap works with smallest element, we want highest so we invert
-        if currentrev not in pendingset:
-            heappush(pendingheap, -currentrev)
-            pendingset.add(currentrev)
-        # iterates on pending rev until after the current rev have been
-        # processed.
-        rev = None
-        while rev != currentrev:
-            rev = -heappop(pendingheap)
-            pendingset.remove(rev)
-
-            # Seek for a subgroup blocked, waiting for the current revision.
-            matching = [i for i, g in enumerate(groups) if rev in g[1]]
-
-            if matching:
-                # The main idea is to gather together all sets that are blocked
-                # on the same revision.
-                #
-                # Groups are merged when a common blocking ancestor is
-                # observed. For example, given two groups:
-                #
-                # revs [5, 4] waiting for 1
-                # revs [3, 2] waiting for 1
-                #
-                # These two groups will be merged when we process
-                # 1. In theory, we could have merged the groups when
-                # we added 2 to the group it is now in (we could have
-                # noticed the groups were both blocked on 1 then), but
-                # the way it works now makes the algorithm simpler.
-                #
-                # We also always keep the oldest subgroup first. We can
-                # probably improve the behavior by having the longest set
-                # first. That way, graph algorithms could minimise the length
-                # of parallel lines their drawing. This is currently not done.
-                targetidx = matching.pop(0)
-                trevs, tparents = groups[targetidx]
-                for i in matching:
-                    gr = groups[i]
-                    trevs.extend(gr[0])
-                    tparents |= gr[1]
-                # delete all merged subgroups (except the one we kept)
-                # (starting from the last subgroup for performance and
-                # sanity reasons)
-                for i in reversed(matching):
-                    del groups[i]
-            else:
-                # This is a new head. We create a new subgroup for it.
-                targetidx = len(groups)
-                groups.append(([], {rev}))
-
-            gr = groups[targetidx]
-
-            # We now add the current nodes to this subgroups. This is done
-            # after the subgroup merging because all elements from a subgroup
-            # that relied on this rev must precede it.
-            #
-            # we also update the <parents> set to include the parents of the
-            # new nodes.
-            if rev == currentrev: # only display stuff in rev
-                gr[0].append(rev)
-            gr[1].remove(rev)
-            parents = [p for p in parentsfunc(rev) if p > node.nullrev]
-            gr[1].update(parents)
-            for p in parents:
-                if p not in pendingset:
-                    pendingset.add(p)
-                    heappush(pendingheap, -p)
-
-            # Look for a subgroup to display
-            #
-            # When unblocked is empty (if clause), we were not waiting for any
-            # revisions during the first iteration (if no priority was given) or
-            # if we emitted a whole disconnected set of the graph (reached a
-            # root).  In that case we arbitrarily take the oldest known
-            # subgroup. The heuristic could probably be better.
-            #
-            # Otherwise (elif clause) if the subgroup is blocked on
-            # a revision we just emitted, we can safely emit it as
-            # well.
-            if not unblocked:
-                if len(groups) > 1:  # display other subset
-                    targetidx = 1
-                    gr = groups[1]
-            elif not gr[1] & unblocked:
-                gr = None
-
-            if gr is not None:
-                # update the set of awaited revisions with the one from the
-                # subgroup
-                unblocked |= gr[1]
-                # output all revisions in the subgroup
-                for r in gr[0]:
-                    yield r
-                # delete the subgroup that you just output
-                # unless it is groups[0] in which case you just empty it.
-                if targetidx:
-                    del groups[targetidx]
-                else:
-                    gr[0][:] = []
-    # Check if we have some subgroup waiting for revisions we are not going to
-    # iterate over
-    for g in groups:
-        for r in g[0]:
-            yield r
-
 @predicate('subrepo([pattern])')
 def subrepo(repo, subset, x):
     """Changesets that add, modify or remove the given subrepo.  If no subrepo