Patchwork [5,of,6] revset: make addset class accepts more than two subsets (issue4565)

login
register
mail settings
Submitter Yuya Nishihara
Date May 12, 2015, 10:59 p.m.
Message ID <d15815ca13aec24b6c62.1431471579@mimosa>
Download mbox | patch
Permalink /patch/9034/
State Accepted
Headers show

Comments

Yuya Nishihara - May 12, 2015, 10:59 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1426404911 -32400
#      Sun Mar 15 16:35:11 2015 +0900
# Node ID d15815ca13aec24b6c62d2ec2eebebea52adc411
# Parent  1ef1b51bba52a6d186fdbc794045eec6b6477be0
revset: make addset class accepts more than two subsets (issue4565)

This allows us to handle chained OR operations in one addset instance, which
should greatly reduce the usage of Python stack.
Pierre-Yves David - May 13, 2015, 12:55 a.m.
On 05/12/2015 03:59 PM, Yuya Nishihara wrote:
> # HG changeset patch
> # User Yuya Nishihara <yuya@tcha.org>
> # Date 1426404911 -32400
> #      Sun Mar 15 16:35:11 2015 +0900
> # Node ID d15815ca13aec24b6c62d2ec2eebebea52adc411
> # Parent  1ef1b51bba52a6d186fdbc794045eec6b6477be0
> revset: make addset class accepts more than two subsets (issue4565)
>
> This allows us to handle chained OR operations in one addset instance, which
> should greatly reduce the usage of Python stack.

The way most python's API usually work on this aspect is by taking an 
arbitrary number of arguments:

 >>> max(1, 2, 3, 4, 5, 6)

we could do the same here:

 >>> addset(x1, x2)
 >>> addset(x1, x2, x3)

what do you think ?
Yuya Nishihara - May 13, 2015, 2:43 p.m.
On Tue, 12 May 2015 17:55:26 -0700, Pierre-Yves David wrote:
> On 05/12/2015 03:59 PM, Yuya Nishihara wrote:
> > # HG changeset patch
> > # User Yuya Nishihara <yuya@tcha.org>
> > # Date 1426404911 -32400
> > #      Sun Mar 15 16:35:11 2015 +0900
> > # Node ID d15815ca13aec24b6c62d2ec2eebebea52adc411
> > # Parent  1ef1b51bba52a6d186fdbc794045eec6b6477be0
> > revset: make addset class accepts more than two subsets (issue4565)
> >
> > This allows us to handle chained OR operations in one addset instance, which
> > should greatly reduce the usage of Python stack.
> 
> The way most python's API usually work on this aspect is by taking an 
> arbitrary number of arguments:
> 
>  >>> max(1, 2, 3, 4, 5, 6)
> 
> we could do the same here:
> 
>  >>> addset(x1, x2)
>  >>> addset(x1, x2, x3)
> 
> what do you think ?

It could, but I'll change addset to be constructed from a list in order to
fix issue4624. So addset(*subsets) won't be so useful.

-def orset(repo, subset, x, y):
-    xl = getset(repo, subset, x)
-    yl = getset(repo, subset, y)
-    return xl + yl
+def orset(repo, subset, *xs):
+    return addset([getset(repo, subset, x) for x in xs])

Patch

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -2732,7 +2732,7 @@  class abstractsmartset(object):
         """Returns a new object with the union of the two collections.
 
         This is part of the mandatory API for smartset."""
-        return addset(self, other)
+        return addset([self, other])
 
     def __sub__(self, other):
         """Returns a new object with the substraction of the two collections.
@@ -2936,52 +2936,56 @@  class filteredset(abstractsmartset):
         return '<%s %r>' % (type(self).__name__, self._subset)
 
 class addset(abstractsmartset):
-    """Represent the addition of two sets
-
-    Wrapper structure for lazily adding two structures without losing much
-    performance on the __contains__ method
-
-    If the ascending attribute is set, that means the two structures are
+    """Represent the addition of more than one sets
+
+    Wrapper structure for lazily adding more than one structures without losing
+    much performance on the __contains__ method
+
+    If the ascending attribute is set, that means the structures are
     ordered in either an ascending or descending way. Therefore, we can add
-    them maintaining the order by iterating over both at the same time
+    them maintaining the order by iterating over them at the same time
 
     >>> xs = baseset([0, 3, 2])
     >>> ys = baseset([5, 2, 4])
-
-    >>> rs = addset(xs, ys)
+    >>> zs = baseset([5, 0, -1])
+
+    >>> rs = addset([xs, ys])
     >>> bool(rs), 0 in rs, 1 in rs, 5 in rs, rs.first(), rs.last()
     (True, True, False, True, 0, 4)
-    >>> rs = addset(xs, baseset([]))
+    >>> rs = addset([xs, baseset([])])
     >>> bool(rs), 0 in rs, 1 in rs, rs.first(), rs.last()
     (True, True, False, 0, 2)
-    >>> rs = addset(baseset([]), baseset([]))
+    >>> rs = addset([baseset([]), baseset([])])
     >>> bool(rs), 0 in rs, rs.first(), rs.last()
     (False, False, None, None)
+    >>> rs = addset([xs, ys, zs])
+    >>> bool(rs), -1 in rs, rs.first(), rs.last()
+    (True, True, 0, -1)
 
     iterate unsorted:
-    >>> rs = addset(xs, ys)
+    >>> rs = addset([xs, ys, zs])
     >>> [x for x in rs]  # without _genlist
-    [0, 3, 2, 5, 4]
+    [0, 3, 2, 5, 4, -1]
     >>> assert not rs._genlist
     >>> len(rs)
-    5
+    6
     >>> [x for x in rs]  # with _genlist
-    [0, 3, 2, 5, 4]
+    [0, 3, 2, 5, 4, -1]
     >>> assert rs._genlist
 
     iterate ascending:
-    >>> rs = addset(xs, ys, ascending=True)
+    >>> rs = addset([xs, ys, zs], ascending=True)
     >>> [x for x in rs], [x for x in rs.fastasc()]  # without _asclist
-    ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5])
+    ([-1, 0, 2, 3, 4, 5], [-1, 0, 2, 3, 4, 5])
     >>> assert not rs._asclist
     >>> len(rs)
-    5
+    6
     >>> [x for x in rs], [x for x in rs.fastasc()]  # with _asclist
-    ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5])
+    ([-1, 0, 2, 3, 4, 5], [-1, 0, 2, 3, 4, 5])
     >>> assert rs._asclist
 
     iterate descending:
-    >>> rs = addset(xs, ys, ascending=False)
+    >>> rs = addset([xs, ys], ascending=False)
     >>> [x for x in rs], [x for x in rs.fastdesc()]  # without _asclist
     ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0])
     >>> assert not rs._asclist
@@ -2992,20 +2996,29 @@  class addset(abstractsmartset):
     >>> assert rs._asclist
 
     iterate ascending without fastasc:
-    >>> rs = addset(xs, generatorset(ys), ascending=True)
+    >>> rs = addset([xs, generatorset(ys)], ascending=True)
     >>> assert rs.fastasc is None
     >>> [x for x in rs]
     [0, 2, 3, 4, 5]
 
     iterate descending without fastdesc:
-    >>> rs = addset(generatorset(xs), ys, ascending=False)
+    >>> rs = addset([generatorset(xs), ys, zs], ascending=False)
     >>> assert rs.fastdesc is None
     >>> [x for x in rs]
-    [5, 4, 3, 2, 0]
+    [5, 4, 3, 2, 0, -1]
+
+    empty subset:
+    >>> rs = addset([])
+    >>> bool(rs), 0 in rs, rs.first(), rs.last()
+    (False, False, None, None)
+    >>> [x for x in rs], [x for x in rs.fastasc()], [x for x in rs.fastdesc()]
+    ([], [], [])
+    >>> rs = addset([], ascending=True)
+    >>> [x for x in rs]
+    []
     """
-    def __init__(self, revs1, revs2, ascending=None):
-        self._r1 = revs1
-        self._r2 = revs2
+    def __init__(self, subsets, ascending=None):
+        self._subsets = subsets
         self._iter = None
         self._ascending = ascending
         self._genlist = None
@@ -3015,7 +3028,7 @@  class addset(abstractsmartset):
         return len(self._list)
 
     def __nonzero__(self):
-        return bool(self._r1) or bool(self._r2)
+        return util.any(bool(rs) for rs in self._subsets)
 
     @util.propertycache
     def _list(self):
@@ -3030,12 +3043,18 @@  class addset(abstractsmartset):
         and then over the second one checking for membership on the first one
         so we don't yield any duplicates.
         """
-        for r in self._r1:
+        if not self._subsets:
+            return
+        rs = self._subsets[0]
+        for r in rs:
             yield r
-        inr1 = self._r1.__contains__
-        for r in self._r2:
-            if not inr1(r):
+        seen = rs
+        for rs in self._subsets[1:]:
+            for r in rs:
+                if r in seen:
+                    continue
                 yield r
+            seen += rs
 
     def __iter__(self):
         if self._ascending is None:
@@ -3063,62 +3082,57 @@  class addset(abstractsmartset):
         self._trysetasclist()
         if self._asclist is not None:
             return self._asclist.__iter__
-        iter1 = self._r1.fastasc
-        iter2 = self._r2.fastasc
-        if None in (iter1, iter2):
+        iters = [rs.fastasc for rs in self._subsets]
+        if None in iters:
             return None
-        return lambda: self._iterordered(True, iter1(), iter2())
+        return lambda: self._iterordered(True, [it() for it in iters])
 
     @property
     def fastdesc(self):
         self._trysetasclist()
         if self._asclist is not None:
             return self._asclist.__reversed__
-        iter1 = self._r1.fastdesc
-        iter2 = self._r2.fastdesc
-        if None in (iter1, iter2):
+        iters = [rs.fastdesc for rs in self._subsets]
+        if None in iters:
             return None
-        return lambda: self._iterordered(False, iter1(), iter2())
-
-    def _iterordered(self, ascending, iter1, iter2):
-        """produce an ordered iteration from two iterators with the same order
+        return lambda: self._iterordered(False, [it() for it in iters])
+
+    def _iterordered(self, ascending, iters):
+        """produce an ordered iteration from iterators with the same order
 
         The ascending is used to indicated the iteration direction.
         """
+        if not iters:
+            return
+
         choice = max
         if ascending:
             choice = min
 
-        val1 = None
-        val2 = None
-        try:
-            # Consume both iterators in an ordered way until one is
-            # empty
-            while True:
-                if val1 is None:
-                    val1 = iter1.next()
-                if val2 is None:
-                    val2 = iter2.next()
-                next = choice(val1, val2)
+        curvals = dict((it, None) for it in iters)
+        # Consume iterators in an ordered way until everything except the last
+        # one is empty
+        while len(curvals) > 1:
+            try:
+                for it, val in curvals.iteritems():
+                    if val is None:
+                        curvals[it] = it.next()
+                next = choice(curvals.itervalues())
                 yield next
-                if val1 == next:
-                    val1 = None
-                if val2 == next:
-                    val2 = None
-        except StopIteration:
-            # Flush any remaining values and consume the other one
-            it = iter2
-            if val1 is not None:
-                yield val1
-                it = iter1
-            elif val2 is not None:
-                # might have been equality and both are empty
-                yield val2
-            for val in it:
-                yield val
+                for it, val in curvals.iteritems():
+                    if val == next:
+                        curvals[it] = None
+            except StopIteration:
+                del curvals[it]
+        # Flush the remaining value and consume the last one
+        it, val = curvals.popitem()
+        if val is not None:
+            yield val
+        for val in it:
+            yield val
 
     def __contains__(self, x):
-        return x in self._r1 or x in self._r2
+        return util.any(x in rs for rs in self._subsets)
 
     def sort(self, reverse=False):
         """Sort the added set
@@ -3153,7 +3167,8 @@  class addset(abstractsmartset):
 
     def __repr__(self):
         d = {None: '', False: '-', True: '+'}[self._ascending]
-        return '<%s%s %r, %r>' % (type(self).__name__, d, self._r1, self._r2)
+        return '<%s%s %s>' % (type(self).__name__, d,
+                              ', '.join(map(repr, self._subsets)))
 
 class generatorset(abstractsmartset):
     """Wrap a generator for lazy iteration