Submitter | Pierre-Yves David |
---|---|
Date | May 15, 2015, 8:29 a.m. |
Message ID | <c1b2704abcd0dd81c7f2.1431678565@marginatus.alto.octopoid.net> |
Download | mbox | patch |
Permalink | /patch/9087/ |
State | Accepted |
Headers | show |
Comments
On Fri, 15 May 2015 01:29:25 -0700, Pierre-Yves David wrote: > # HG changeset patch > # User Pierre-Yves David <pierre-yves.david@fb.com> > # Date 1431674743 25200 > # Fri May 15 00:25:43 2015 -0700 > # Node ID c1b2704abcd0dd81c7f29d1fc21e017665e0941c > # Parent d1bd0fd07ee6adf4ab3be2b0a0a7c0df54d55abf > revset: fix iteration over ordered addset composed of non-ordered operant > > Before this change, doing ordered iteration over an 'addset' object composed of > operant without fastasc or fastdesc method could result into duplicated entry. > This was the result of applying '_iterordered' on non ordered set. > > We fix it by ensuring we iterate over the set in a sorted order. Using the fast > iterator when it exists on any operand. We kill the '_iterator' method in the > process because it did not made a lot of sense independently. [...] > + # try to use our own fast iterator if it exists > self._trysetasclist() > if self._ascending: > it = self.fastasc > else: > it = self.fastdesc > - if it is None: > - # consume the gen and try again > - self._list > - return iter(self) > - return it() > + if it is not None: > + return it() > + # maybe half of the component supports fast > + attr = 'fastdesc' > + if self._ascending: > + attr = 'fastasc' > + # get iterator for _r1 > + iter1 = getattr(self._r1, attr) > + if iter1 is None: > + # let's avoid side effect (not sure it matters) > + iter1 = iter(sorted(self._r1, reverse=not self._ascending)) > + else: > + iter1 = iter1() > + # get iterator for _r2 > + iter2 = getattr(self._r2, attr) > + if iter2 is None: > + # let's avoid side effect (not sure it matters) > + iter2 = iter(sorted(self._r2, reverse=not self._ascending)) > + else: > + iter2 = iter2() > + return self._iterordered(self._ascending, iter1, iter2) LGTM, thanks for fixing it. Can you pick "[PATCH 4 of 6] revset: drop redundant filteredset from right-hand side set of "or" operation" from my previous series? I think it's okay. I'll rewrite the other patches. Regards,
On Fri, May 15, 2015 at 01:29:25AM -0700, Pierre-Yves David wrote: > # HG changeset patch > # User Pierre-Yves David <pierre-yves.david@fb.com> > # Date 1431674743 25200 > # Fri May 15 00:25:43 2015 -0700 > # Node ID c1b2704abcd0dd81c7f29d1fc21e017665e0941c > # Parent d1bd0fd07ee6adf4ab3be2b0a0a7c0df54d55abf > revset: fix iteration over ordered addset composed of non-ordered operant Queued this with some commit message copyediting. > > Before this change, doing ordered iteration over an 'addset' object composed of > operant without fastasc or fastdesc method could result into duplicated entry. > This was the result of applying '_iterordered' on non ordered set. > > We fix it by ensuring we iterate over the set in a sorted order. Using the fast > iterator when it exists on any operand. We kill the '_iterator' method in the > process because it did not made a lot of sense independently. > > Thanks goes to Yuja Nishihara for reporting the issue and analysing the cause. > > diff --git a/mercurial/revset.py b/mercurial/revset.py > --- a/mercurial/revset.py > +++ b/mercurial/revset.py > @@ -2972,38 +2972,38 @@ class addset(abstractsmartset): > iterate ascending: > >>> rs = addset(xs, ys, ascending=True) > >>> [x for x in rs], [x for x in rs.fastasc()] # without _asclist > ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5]) > >>> assert not rs._asclist > - >>> len(rs) # BROKEN > - 6 > - >>> [x for x in rs], [x for x in rs.fastasc()] # BROKEN with _asclist > - ([0, 2, 2, 3, 4, 5], [0, 2, 2, 3, 4, 5]) > + >>> len(rs) > + 5 > + >>> [x for x in rs], [x for x in rs.fastasc()] > + ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5]) > >>> assert rs._asclist > > iterate descending: > >>> rs = addset(xs, ys, ascending=False) > >>> [x for x in rs], [x for x in rs.fastdesc()] # without _asclist > ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0]) > >>> assert not rs._asclist > - >>> len(rs) # BROKEN > - 6 > - >>> [x for x in rs], [x for x in rs.fastdesc()] # BROKEN with _asclist > - ([5, 4, 3, 2, 2, 0], [5, 4, 3, 2, 2, 0]) > + >>> len(rs) > + 5 > + >>> [x for x in rs], [x for x in rs.fastdesc()] > + ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0]) > >>> assert rs._asclist > > iterate ascending without fastasc: > >>> rs = addset(xs, generatorset(ys), ascending=True) > >>> assert rs.fastasc is None > - >>> [x for x in rs] # BROKEN > - [0, 2, 2, 3, 4, 5] > + >>> [x for x in rs] > + [0, 2, 3, 4, 5] > > iterate descending without fastdesc: > >>> rs = addset(generatorset(xs), ys, ascending=False) > >>> assert rs.fastdesc is None > - >>> [x for x in rs] # BROKEN > - [5, 4, 3, 2, 2, 0] > + >>> [x for x in rs] > + [5, 4, 3, 2, 0] > """ > def __init__(self, revs1, revs2, ascending=None): > self._r1 = revs1 > self._r2 = revs2 > self._iter = None > @@ -3018,53 +3018,61 @@ class addset(abstractsmartset): > return bool(self._r1) or bool(self._r2) > > @util.propertycache > def _list(self): > if not self._genlist: > - self._genlist = baseset(self._iterator()) > + self._genlist = baseset(iter(self)) > return self._genlist > > - def _iterator(self): > + def __iter__(self): > """Iterate over both collections without repeating elements > > If the ascending attribute is not set, iterate over the first one and > then over the second one checking for membership on the first one so we > dont yield any duplicates. > > If the ascending attribute is set, iterate over both collections at the > same time, yielding only one value at a time in the given order. > """ > if self._ascending is None: > - def gen(): > + if self._genlist: > + return iter(self._genlist) > + def arbitraryordergen(): > for r in self._r1: > yield r > inr1 = self._r1.__contains__ > for r in self._r2: > if not inr1(r): > yield r > - gen = gen() > - else: > - iter1 = iter(self._r1) > - iter2 = iter(self._r2) > - gen = self._iterordered(self._ascending, iter1, iter2) > - return gen > - > - def __iter__(self): > - if self._ascending is None: > - if self._genlist: > - return iter(self._genlist) > - return iter(self._iterator()) > + return arbitraryordergen() > + # try to use our own fast iterator if it exists > self._trysetasclist() > if self._ascending: > it = self.fastasc > else: > it = self.fastdesc > - if it is None: > - # consume the gen and try again > - self._list > - return iter(self) > - return it() > + if it is not None: > + return it() > + # maybe half of the component supports fast > + attr = 'fastdesc' > + if self._ascending: > + attr = 'fastasc' > + # get iterator for _r1 > + iter1 = getattr(self._r1, attr) > + if iter1 is None: > + # let's avoid side effect (not sure it matters) > + iter1 = iter(sorted(self._r1, reverse=not self._ascending)) > + else: > + iter1 = iter1() > + # get iterator for _r2 > + iter2 = getattr(self._r2, attr) > + if iter2 is None: > + # let's avoid side effect (not sure it matters) > + iter2 = iter(sorted(self._r2, reverse=not self._ascending)) > + else: > + iter2 = iter2() > + return self._iterordered(self._ascending, iter1, iter2) > > def _trysetasclist(self): > """populate the _asclist attribute if possible and necessary""" > if self._genlist is not None and self._asclist is None: > self._asclist = sorted(self._genlist) > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel
Patch
diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -2972,38 +2972,38 @@ class addset(abstractsmartset): iterate ascending: >>> rs = addset(xs, ys, ascending=True) >>> [x for x in rs], [x for x in rs.fastasc()] # without _asclist ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5]) >>> assert not rs._asclist - >>> len(rs) # BROKEN - 6 - >>> [x for x in rs], [x for x in rs.fastasc()] # BROKEN with _asclist - ([0, 2, 2, 3, 4, 5], [0, 2, 2, 3, 4, 5]) + >>> len(rs) + 5 + >>> [x for x in rs], [x for x in rs.fastasc()] + ([0, 2, 3, 4, 5], [0, 2, 3, 4, 5]) >>> assert rs._asclist iterate descending: >>> rs = addset(xs, ys, ascending=False) >>> [x for x in rs], [x for x in rs.fastdesc()] # without _asclist ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0]) >>> assert not rs._asclist - >>> len(rs) # BROKEN - 6 - >>> [x for x in rs], [x for x in rs.fastdesc()] # BROKEN with _asclist - ([5, 4, 3, 2, 2, 0], [5, 4, 3, 2, 2, 0]) + >>> len(rs) + 5 + >>> [x for x in rs], [x for x in rs.fastdesc()] + ([5, 4, 3, 2, 0], [5, 4, 3, 2, 0]) >>> assert rs._asclist iterate ascending without fastasc: >>> rs = addset(xs, generatorset(ys), ascending=True) >>> assert rs.fastasc is None - >>> [x for x in rs] # BROKEN - [0, 2, 2, 3, 4, 5] + >>> [x for x in rs] + [0, 2, 3, 4, 5] iterate descending without fastdesc: >>> rs = addset(generatorset(xs), ys, ascending=False) >>> assert rs.fastdesc is None - >>> [x for x in rs] # BROKEN - [5, 4, 3, 2, 2, 0] + >>> [x for x in rs] + [5, 4, 3, 2, 0] """ def __init__(self, revs1, revs2, ascending=None): self._r1 = revs1 self._r2 = revs2 self._iter = None @@ -3018,53 +3018,61 @@ class addset(abstractsmartset): return bool(self._r1) or bool(self._r2) @util.propertycache def _list(self): if not self._genlist: - self._genlist = baseset(self._iterator()) + self._genlist = baseset(iter(self)) return self._genlist - def _iterator(self): + def __iter__(self): """Iterate over both collections without repeating elements If the ascending attribute is not set, iterate over the first one and then over the second one checking for membership on the first one so we dont yield any duplicates. If the ascending attribute is set, iterate over both collections at the same time, yielding only one value at a time in the given order. """ if self._ascending is None: - def gen(): + if self._genlist: + return iter(self._genlist) + def arbitraryordergen(): for r in self._r1: yield r inr1 = self._r1.__contains__ for r in self._r2: if not inr1(r): yield r - gen = gen() - else: - iter1 = iter(self._r1) - iter2 = iter(self._r2) - gen = self._iterordered(self._ascending, iter1, iter2) - return gen - - def __iter__(self): - if self._ascending is None: - if self._genlist: - return iter(self._genlist) - return iter(self._iterator()) + return arbitraryordergen() + # try to use our own fast iterator if it exists self._trysetasclist() if self._ascending: it = self.fastasc else: it = self.fastdesc - if it is None: - # consume the gen and try again - self._list - return iter(self) - return it() + if it is not None: + return it() + # maybe half of the component supports fast + attr = 'fastdesc' + if self._ascending: + attr = 'fastasc' + # get iterator for _r1 + iter1 = getattr(self._r1, attr) + if iter1 is None: + # let's avoid side effect (not sure it matters) + iter1 = iter(sorted(self._r1, reverse=not self._ascending)) + else: + iter1 = iter1() + # get iterator for _r2 + iter2 = getattr(self._r2, attr) + if iter2 is None: + # let's avoid side effect (not sure it matters) + iter2 = iter(sorted(self._r2, reverse=not self._ascending)) + else: + iter2 = iter2() + return self._iterordered(self._ascending, iter1, iter2) def _trysetasclist(self): """populate the _asclist attribute if possible and necessary""" if self._genlist is not None and self._asclist is None: self._asclist = sorted(self._genlist)