Patchwork [4,of,6,V2] rust: hooking into Python code

login
register
mail settings
Submitter Georges Racinet
Date Oct. 9, 2018, 3:22 p.m.
Message ID <210cd79d35d50e989a7e.1539098568@purity.tombe.racinet.fr>
Download mbox | patch
Permalink /patch/35582/
State Superseded
Headers show

Comments

Georges Racinet - Oct. 9, 2018, 3:22 p.m.
# HG changeset patch
# User Georges Racinet <gracinet@anybox.fr>
# Date 1538060144 -7200
#      Thu Sep 27 16:55:44 2018 +0200
# Node ID 210cd79d35d50e989a7eabbcebb4addde9365f9e
# Parent  cf5c799e65a1225538fa1246887e2efd94c09acc
# EXP-Topic rustancestors-rfc
rust: hooking into Python code

We introduce a new class called 'rustlazyancestors'
in the ancestors module, which is used only if
parsers.rustlazyancestors does exist.

The implementation of __contains__ stays unchanged,
but is now backed by the Rust iterator. It would
probably be a good candidate for further development,
though, as it is mostly looping, and duplicates the
'seen' set.

The Rust code could be further optimized, however it already
gives rise to performance improvements:

median timing from hg perfancestors:
- on pypy:
    before: 0.113749s
    after:  0.018628s -84%
- on mozilla central:
    before: 0.329075s
    after:  0.083889s -75%
- on a private repository (about one million revisions):
    before: 1.982365s
    after:  0.329907s -83%
- on another private repository (about 400 000 revisions):
    before: 0.826686s
    after:  0.123760s -85%

median timing for hg perfbranchmap base
- on pypy:
    before:  1.808351s
    after:   0.480814s -73%
- on mozilla central:
    before: 18.493269s
    after:   1.305514s -93%
- on a private repository (about one million revisions):
    before:  9.153785s
    after:   3.662155s -60%
- on another private repository (about 400 000 revisions):
    before: 98.034737s
    after:  18.109361s -81%
Yuya Nishihara - Oct. 12, 2018, 5:03 a.m.
On Tue, 09 Oct 2018 17:22:48 +0200, Georges Racinet wrote:
> # HG changeset patch
> # User Georges Racinet <gracinet@anybox.fr>
> # Date 1538060144 -7200
> #      Thu Sep 27 16:55:44 2018 +0200
> # Node ID 210cd79d35d50e989a7eabbcebb4addde9365f9e
> # Parent  cf5c799e65a1225538fa1246887e2efd94c09acc
> # EXP-Topic rustancestors-rfc
> rust: hooking into Python code

> +class rustlazyancestors(lazyancestors):
> +
> +    def __init__(self, index, revs, stoprev=0, inclusive=False):

It's probably better to duplicate __nonzero__() instead of inheriting
lazyancestors and not initializing the super class.

> +        self._index = index
> +        self._stoprev = stoprev
> +        self._inclusive = inclusive
> +        # no need to prefilter out init revs that are smaller than stoprev,
> +        # it's done by rustlazyancestors constructor.
> +        # we need to convert to a list, because our ruslazyancestors
> +        # constructor (from C code) doesn't understand anything else yet
> +        self._initrevs = initrevs = list(revs)
> +
> +        self._containsseen = set()
> +        self._containsiter = parsers.rustlazyancestors(
> +            index, initrevs, stoprev, inclusive)

> diff -r cf5c799e65a1 -r 210cd79d35d5 mercurial/changelog.py
> --- a/mercurial/changelog.py	Thu Sep 27 16:56:15 2018 +0200
> +++ b/mercurial/changelog.py	Thu Sep 27 16:55:44 2018 +0200

> +    def ancestors(self, revs, *args, **kwargs):
> +        if util.safehasattr(parsers, 'rustlazyancestors') and self.filteredrevs:
> +            missing = self.filteredrevs.difference(revs)
> +            if missing:
> +                # raise the lookup error
> +                self.rev(min(missing))
> +        return super(changelog, self).ancestors(revs, *args, **kwargs)

>      def reachableroots(self, minroot, heads, roots, includepath=False):
>          return self.index.reachableroots2(minroot, heads, roots, includepath)
>  
> diff -r cf5c799e65a1 -r 210cd79d35d5 mercurial/revlog.py
> --- a/mercurial/revlog.py	Thu Sep 27 16:56:15 2018 +0200
> +++ b/mercurial/revlog.py	Thu Sep 27 16:55:44 2018 +0200
> @@ -747,6 +747,10 @@
>  
>          See the documentation for ancestor.lazyancestors for more details."""
>  
> +        if util.safehasattr(parsers, 'rustlazyancestors'):
> +            return ancestor.rustlazyancestors(
> +                self.index, revs,
> +                stoprev=stoprev, inclusive=inclusive)
>          return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
>                                        inclusive=inclusive)

I think these "if WITH_RUST" things can be handled in ancestors.py.
Georges Racinet - Oct. 12, 2018, 9:39 a.m.
On 10/12/2018 07:03 AM, Yuya Nishihara wrote:
> On Tue, 09 Oct 2018 17:22:48 +0200, Georges Racinet wrote:
>> # HG changeset patch
>> # User Georges Racinet <gracinet@anybox.fr>
>> # Date 1538060144 -7200
>> #      Thu Sep 27 16:55:44 2018 +0200
>> # Node ID 210cd79d35d50e989a7eabbcebb4addde9365f9e
>> # Parent  cf5c799e65a1225538fa1246887e2efd94c09acc
>> # EXP-Topic rustancestors-rfc
>> rust: hooking into Python code
>> +class rustlazyancestors(lazyancestors):
>> +
>> +    def __init__(self, index, revs, stoprev=0, inclusive=False):
> It's probably better to duplicate __nonzero__() instead of inheriting
> lazyancestors and not initializing the super class.
Yes, I was wondering about that. Will do it in PATCH 6 v3.
>
>> +        self._index = index
>> +        self._stoprev = stoprev
>> +        self._inclusive = inclusive
>> +        # no need to prefilter out init revs that are smaller than stoprev,
>> +        # it's done by rustlazyancestors constructor.
>> +        # we need to convert to a list, because our ruslazyancestors
>> +        # constructor (from C code) doesn't understand anything else yet
>> +        self._initrevs = initrevs = list(revs)
>> +
>> +        self._containsseen = set()
>> +        self._containsiter = parsers.rustlazyancestors(
>> +            index, initrevs, stoprev, inclusive)
>> diff -r cf5c799e65a1 -r 210cd79d35d5 mercurial/changelog.py
>> --- a/mercurial/changelog.py	Thu Sep 27 16:56:15 2018 +0200
>> +++ b/mercurial/changelog.py	Thu Sep 27 16:55:44 2018 +0200
>> +    def ancestors(self, revs, *args, **kwargs):
>> +        if util.safehasattr(parsers, 'rustlazyancestors') and self.filteredrevs:
>> +            missing = self.filteredrevs.difference(revs)
>> +            if missing:
>> +                # raise the lookup error
>> +                self.rev(min(missing))
>> +        return super(changelog, self).ancestors(revs, *args, **kwargs)
>>      def reachableroots(self, minroot, heads, roots, includepath=False):
>>          return self.index.reachableroots2(minroot, heads, roots, includepath)
>>  
>> diff -r cf5c799e65a1 -r 210cd79d35d5 mercurial/revlog.py
>> --- a/mercurial/revlog.py	Thu Sep 27 16:56:15 2018 +0200
>> +++ b/mercurial/revlog.py	Thu Sep 27 16:55:44 2018 +0200
>> @@ -747,6 +747,10 @@
>>  
>>          See the documentation for ancestor.lazyancestors for more details."""
>>  
>> +        if util.safehasattr(parsers, 'rustlazyancestors'):
>> +            return ancestor.rustlazyancestors(
>> +                self.index, revs,
>> +                stoprev=stoprev, inclusive=inclusive)
>>          return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
>>                                        inclusive=inclusive)
> I think these "if WITH_RUST" things can be handled in ancestors.py.
The reason was that ancestor.lazyancestors is initialized from the
parents function, whereas the Rust bascked implementation needs the
index.  I'd be glad to push that down to ancestors.py and harmonize over
passing the index, but that's a bigger change.
Yuya Nishihara - Oct. 12, 2018, 10:19 a.m.
On Fri, 12 Oct 2018 11:39:18 +0200, Georges Racinet wrote:
> On 10/12/2018 07:03 AM, Yuya Nishihara wrote:
> > On Tue, 09 Oct 2018 17:22:48 +0200, Georges Racinet wrote:
> >> # HG changeset patch
> >> # User Georges Racinet <gracinet@anybox.fr>
> >> # Date 1538060144 -7200
> >> #      Thu Sep 27 16:55:44 2018 +0200
> >> # Node ID 210cd79d35d50e989a7eabbcebb4addde9365f9e
> >> # Parent  cf5c799e65a1225538fa1246887e2efd94c09acc
> >> # EXP-Topic rustancestors-rfc
> >> rust: hooking into Python code

> >> diff -r cf5c799e65a1 -r 210cd79d35d5 mercurial/revlog.py
> >> --- a/mercurial/revlog.py	Thu Sep 27 16:56:15 2018 +0200
> >> +++ b/mercurial/revlog.py	Thu Sep 27 16:55:44 2018 +0200
> >> @@ -747,6 +747,10 @@
> >>  
> >>          See the documentation for ancestor.lazyancestors for more details."""
> >>  
> >> +        if util.safehasattr(parsers, 'rustlazyancestors'):
> >> +            return ancestor.rustlazyancestors(
> >> +                self.index, revs,
> >> +                stoprev=stoprev, inclusive=inclusive)
> >>          return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
> >>                                        inclusive=inclusive)
> > I think these "if WITH_RUST" things can be handled in ancestors.py.
> The reason was that ancestor.lazyancestors is initialized from the
> parents function, whereas the Rust bascked implementation needs the
> index.  I'd be glad to push that down to ancestors.py and harmonize over
> passing the index, but that's a bigger change.

Ah, good point. The factory function could be moved to ancestors.py, but that
wouldn't make sense given we'll have to pass in a revlog instance to it.

Patch

diff -r cf5c799e65a1 -r 210cd79d35d5 mercurial/ancestor.py
--- a/mercurial/ancestor.py	Thu Sep 27 16:56:15 2018 +0200
+++ b/mercurial/ancestor.py	Thu Sep 27 16:55:44 2018 +0200
@@ -11,9 +11,12 @@ 
 
 from .node import nullrev
 from . import (
+    policy,
     pycompat,
 )
 
+parsers = policy.importmod(r'parsers')
+
 def commonancestorsheads(pfunc, *nodes):
     """Returns a set with the heads of all common ancestors of all nodes,
     heads(::nodes[0] and ::nodes[1] and ...) .
@@ -379,3 +382,25 @@ 
             # free up memory.
             self._containsiter = None
             return False
+
+class rustlazyancestors(lazyancestors):
+
+    def __init__(self, index, revs, stoprev=0, inclusive=False):
+        self._index = index
+        self._stoprev = stoprev
+        self._inclusive = inclusive
+        # no need to prefilter out init revs that are smaller than stoprev,
+        # it's done by rustlazyancestors constructor.
+        # we need to convert to a list, because our ruslazyancestors
+        # constructor (from C code) doesn't understand anything else yet
+        self._initrevs = initrevs = list(revs)
+
+        self._containsseen = set()
+        self._containsiter = parsers.rustlazyancestors(
+            index, initrevs, stoprev, inclusive)
+
+    def __iter__(self):
+        return parsers.rustlazyancestors(self._index,
+                                         self._initrevs,
+                                         self._stoprev,
+                                         self._inclusive)
diff -r cf5c799e65a1 -r 210cd79d35d5 mercurial/changelog.py
--- a/mercurial/changelog.py	Thu Sep 27 16:56:15 2018 +0200
+++ b/mercurial/changelog.py	Thu Sep 27 16:55:44 2018 +0200
@@ -20,14 +20,18 @@ 
 from . import (
     encoding,
     error,
+    policy,
     pycompat,
     revlog,
+    util,
 )
 from .utils import (
     dateutil,
     stringutil,
 )
 
+parsers = policy.importmod(r'parsers')
+
 _defaultextra = {'branch': 'default'}
 
 def _string_escape(text):
@@ -343,6 +347,14 @@ 
             if i not in self.filteredrevs:
                 yield i
 
+    def ancestors(self, revs, *args, **kwargs):
+        if util.safehasattr(parsers, 'rustlazyancestors') and self.filteredrevs:
+            missing = self.filteredrevs.difference(revs)
+            if missing:
+                # raise the lookup error
+                self.rev(min(missing))
+        return super(changelog, self).ancestors(revs, *args, **kwargs)
+
     def reachableroots(self, minroot, heads, roots, includepath=False):
         return self.index.reachableroots2(minroot, heads, roots, includepath)
 
diff -r cf5c799e65a1 -r 210cd79d35d5 mercurial/revlog.py
--- a/mercurial/revlog.py	Thu Sep 27 16:56:15 2018 +0200
+++ b/mercurial/revlog.py	Thu Sep 27 16:55:44 2018 +0200
@@ -747,6 +747,10 @@ 
 
         See the documentation for ancestor.lazyancestors for more details."""
 
+        if util.safehasattr(parsers, 'rustlazyancestors'):
+            return ancestor.rustlazyancestors(
+                self.index, revs,
+                stoprev=stoprev, inclusive=inclusive)
         return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                       inclusive=inclusive)