Patchwork [3,of,4] revset: introduce unionset that can handle more than two sets in one instance

login
register
mail settings
Submitter Yuya Nishihara
Date May 19, 2015, 12:18 p.m.
Message ID <9996864f9e14a826ebe2.1432037900@mimosa>
Download mbox | patch
Permalink /patch/9168/
State Changes Requested
Delegated to: Pierre-Yves David
Headers show

Comments

Yuya Nishihara - May 19, 2015, 12:18 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1431757205 -32400
#      Sat May 16 15:20:05 2015 +0900
# Node ID 9996864f9e14a826ebe20df3a48905a662fe8eba
# Parent  990823292a267aeaecf73f0e9912781d0b40eeb1
revset: introduce unionset that can handle more than two sets in one instance

The new unionset allows us to handle chained OR operations in one instance,
which should greatly reduce the usage of Python stack. On the other hand, we
have to avoid the perf regression caused by extra list operations. This is
achieved by putting loop-unrolled __nonzero__ and __contains__ into the addset
class.

0) 1ef96a3b8b89
1) without loop unrolling (one in previous patch series)
2) with loop unrolling (this version)

revset #0: roots((0:tip)::)
0) wall 0.125534 comb 0.130000 user 0.130000 sys 0.000000 (best of 77)
1) wall 0.148035 comb 0.150000 user 0.150000 sys 0.000000 (best of 63)
2) wall 0.124583 comb 0.130000 user 0.130000 sys 0.000000 (best of 76)

Patch

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -3003,18 +3003,19 @@  def _iterordered(ascending, iters):
         iter2 = _iterordered(ascending, iters[p:])
         return _iterorderedpair(ascending, iter1, iter2)
 
-class addset(abstractsmartset):
-    """Represent the addition of two sets
-
-    Wrapper structure for lazily adding two structures without losing much
+class unionset(abstractsmartset):
+    """Represent the union of sets
+
+    Wrapper structure for lazily adding structures without losing much
     performance on the __contains__ method
 
-    If the ascending attribute is set, that means the two structures are
+    If the ascending attribute is set, that means the structures are
     ordered in either an ascending or descending way. Therefore, we can add
-    them maintaining the order by iterating over both at the same time
+    them maintaining the order by iterating over them at the same time
 
     >>> xs = baseset([0, 3, 2])
     >>> ys = baseset([5, 2, 4])
+    >>> zs = baseset([5, 0, -1])
 
     >>> rs = addset(xs, ys)
     >>> bool(rs), 0 in rs, 1 in rs, 5 in rs, rs.first(), rs.last()
@@ -3025,27 +3026,30 @@  class addset(abstractsmartset):
     >>> rs = addset(baseset([]), baseset([]))
     >>> bool(rs), 0 in rs, rs.first(), rs.last()
     (False, False, None, None)
+    >>> rs = unionset([xs, ys, zs])
+    >>> bool(rs), -1 in rs, rs.first(), rs.last()
+    (True, True, 0, -1)
 
     iterate unsorted:
-    >>> rs = addset(xs, ys)
+    >>> rs = unionset([xs, ys, zs])
     >>> [x for x in rs]  # without _genlist
-    [0, 3, 2, 5, 4]
+    [0, 3, 2, 5, 4, -1]
     >>> assert not rs._genlist
     >>> len(rs)
-    5
+    6
     >>> [x for x in rs]  # with _genlist
-    [0, 3, 2, 5, 4]
+    [0, 3, 2, 5, 4, -1]
     >>> assert rs._genlist
 
     iterate ascending:
-    >>> rs = addset(xs, ys, ascending=True)
+    >>> rs = unionset([xs, ys, zs], ascending=True)
     >>> [x for x in rs], [x for x in rs.fastasc()]  # without _asclist
-    ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5])
+    ([-1, 0, 2, 3, 4, 5], [-1, 0, 2, 3, 4, 5])
     >>> assert not rs._asclist
     >>> len(rs)
-    5
-    >>> [x for x in rs], [x for x in rs.fastasc()]
-    ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5])
+    6
+    >>> [x for x in rs], [x for x in rs.fastasc()]  # with _asclist
+    ([-1, 0, 2, 3, 4, 5], [-1, 0, 2, 3, 4, 5])
     >>> assert rs._asclist
 
     iterate descending:
@@ -3055,7 +3059,7 @@  class addset(abstractsmartset):
     >>> assert not rs._asclist
     >>> len(rs)
     5
-    >>> [x for x in rs], [x for x in rs.fastdesc()]
+    >>> [x for x in rs], [x for x in rs.fastdesc()]  # with _asclist
     ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0])
     >>> assert rs._asclist
 
@@ -3066,15 +3070,13 @@  class addset(abstractsmartset):
     [0, 2, 3, 4, 5]
 
     iterate descending without fastdesc:
-    >>> rs = addset(generatorset(xs), ys, ascending=False)
+    >>> rs = unionset([generatorset(xs), ys, zs], ascending=False)
     >>> assert rs.fastdesc is None
     >>> [x for x in rs]
-    [5, 4, 3, 2, 0]
+    [5, 4, 3, 2, 0, -1]
     """
-    def __init__(self, revs1, revs2, ascending=None):
-        self._r1 = revs1
-        self._r2 = revs2
-        self._subsets = [revs1, revs2]
+    def __init__(self, subsets, ascending=None):
+        self._subsets = subsets
         self._iter = None
         self._ascending = ascending
         self._genlist = None
@@ -3084,7 +3086,7 @@  class addset(abstractsmartset):
         return len(self._list)
 
     def __nonzero__(self):
-        return bool(self._r1) or bool(self._r2)
+        return any(bool(rs) for rs in self._subsets)
 
     @util.propertycache
     def _list(self):
@@ -3164,7 +3166,7 @@  class addset(abstractsmartset):
         return lambda: _iterordered(False, [it() for it in iterfuncs])
 
     def __contains__(self, x):
-        return x in self._r1 or x in self._r2
+        return any(x in rs for rs in self._subsets)
 
     def sort(self, reverse=False):
         """Sort the added set
@@ -3199,7 +3201,21 @@  class addset(abstractsmartset):
 
     def __repr__(self):
         d = {None: '', False: '-', True: '+'}[self._ascending]
-        return '<%s%s %r, %r>' % (type(self).__name__, d, self._r1, self._r2)
+        return '<%s%s %s>' % (type(self).__name__, d,
+                              ', '.join(map(repr, self._subsets)))
+
+class addset(unionset):
+    """Represent the union of sets, optimized for two sets"""
+    def __init__(self, revs1, revs2, ascending=None):
+        super(addset, self).__init__([revs1, revs2], ascending=ascending)
+
+    def __nonzero__(self):
+        r1, r2 = self._subsets
+        return bool(r1) or bool(r2)
+
+    def __contains__(self, x):
+        r1, r2 = self._subsets
+        return x in r1 or x in r2
 
 class generatorset(abstractsmartset):
     """Wrap a generator for lazy iteration