Patchwork [7,of,7] context: floor adjustlinkrev graph walk during copy tracing

login
register
mail settings
Submitter Boris Feld
Date Sept. 7, 2018, 3:04 p.m.
Message ID <0f720a4aa166d08daaa3.1536332651@localhost.localdomain>
Download mbox | patch
Permalink /patch/34398/
State Accepted
Headers show

Comments

Boris Feld - Sept. 7, 2018, 3:04 p.m.
# HG changeset patch
# User Boris Feld <boris.feld@octobus.net>
# Date 1536255188 14400
#      Thu Sep 06 13:33:08 2018 -0400
# Node ID 0f720a4aa166d08daaa39e8462f2291f77e825c1
# Parent  441c39342d63c75ee101587b2fbf3af60800762f
# EXP-Topic copy-perf
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 0f720a4aa166
context: floor adjustlinkrev graph walk during copy tracing

The `_adjustlinkrev` method gains an optional "floor" argument. The linkrev
adjustment will give up once this floor is reached. The relevant functions
using `_adjustlinkrev` are updated to pass an appropriate value in the copy
tracing code.

In some private repository, about 10% of the status call triggered
pathological case addressed by this change. The speedup varies from one call
to another, the best-observed win is moving from 170s to 11s.
via Mercurial-devel - Sept. 10, 2018, 5:36 p.m.
On Fri, Sep 7, 2018 at 8:14 AM Boris Feld <boris.feld@octobus.net> wrote:

> # HG changeset patch
> # User Boris Feld <boris.feld@octobus.net>
> # Date 1536255188 14400
> #      Thu Sep 06 13:33:08 2018 -0400
> # Node ID 0f720a4aa166d08daaa39e8462f2291f77e825c1
> # Parent  441c39342d63c75ee101587b2fbf3af60800762f
> # EXP-Topic copy-perf
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r
> 0f720a4aa166
> context: floor adjustlinkrev graph walk during copy tracing
>
> The `_adjustlinkrev` method gains an optional "floor" argument. The linkrev
> adjustment will give up once this floor is reached. The relevant functions
> using `_adjustlinkrev` are updated to pass an appropriate value in the copy
> tracing code.
>
> In some private repository, about 10% of the status call triggered
> pathological case addressed by this change. The speedup varies from one
> call
> to another, the best-observed win is moving from 170s to 11s.
>
> diff --git a/mercurial/context.py b/mercurial/context.py
> --- a/mercurial/context.py
> +++ b/mercurial/context.py
> @@ -633,7 +633,7 @@ class basefilectx(object):
>      def _changeid(self):
>          return self._findchangeid()
>
> -    def _findchangeid(self):
> +    def _findchangeid(self, floor=None):
>

Could we call it "stoprev" like we do in revlog.ancestors() and a few other
places (assuming it's conceptually the same thing)?

         if r'_changeid' in self.__dict__:
>              changeid = self._changeid
>          elif r'_changectx' in self.__dict__:
> @@ -641,10 +641,11 @@ class basefilectx(object):
>          elif r'_descendantrev' in self.__dict__:
>              # this file context was created from a revision with a known
>              # descendant, we can (lazily) correct for linkrev aliases
> -            changeid = self._adjustlinkrev(self._descendantrev)
> +            changeid = self._adjustlinkrev(self._descendantrev,
> floor=floor)
>          else:
>              changeid = self._filelog.linkrev(self._filerev)
> -        self._changeid = changeid
> +        if changeid is not None:
> +            self._changeid = changeid
>          return changeid
>
>      @propertycache
> @@ -788,7 +789,7 @@ class basefilectx(object):
>
>          return True
>
> -    def _adjustlinkrev(self, srcrev, inclusive=False):
> +    def _adjustlinkrev(self, srcrev, inclusive=False, floor=None):
>          """return the first ancestor of <srcrev> introducing <fnode>
>
>          If the linkrev of the file revision does not point to an ancestor
> of
> @@ -797,6 +798,10 @@ class basefilectx(object):
>
>          :srcrev: the changeset revision we search ancestors from
>          :inclusive: if true, the src revision will also be checked
> +        :floor: an optional revision to stop the walk at. If no
> introduction
> +                of this file content could be found before this floor
> +                revision, the function will returns "None" and stops its
> +                iteration.
>          """
>          repo = self._repo
>          cl = repo.unfiltered().changelog
> @@ -822,6 +827,8 @@ class basefilectx(object):
>              fnode = self._filenode
>              path = self._path
>              for a in iteranc:
> +                if floor is not None and a < floor:
> +                    return None
>                  ac = cl.read(a) # get changeset data (we avoid object
> creation)
>                  if path in ac[3]: # checking the 'files' field.
>                      # The file has been touched, check if the content is
> @@ -837,8 +844,12 @@ class basefilectx(object):
>      def isintroducedafter(self, changelogrev):
>          """True if a filectx have been introduced after a given floor
> revision
>          """
> -        return (changelogrev <= self.linkrev()
> -                or changelogrev <= self._introrev())
> +        if changelogrev <= self.linkrev():
> +            return True
> +        introrev = self._introrev(floor=changelogrev)
> +        if introrev is None:
> +            return False
> +        return changelogrev <= introrev
>
>      def _lazyrev(self):
>          """return self.rev() if it is available without computation,
> @@ -865,19 +876,28 @@ class basefilectx(object):
>          revision is one of its ancestors. This prevents bugs from
>          'linkrev-shadowing' when a file revision is used by multiple
>          changesets.
> +
>

Seems accidental and should be dropped?


>          """
>          return self._introrev()
>
> -    def _introrev(self):
> +    def _introrev(self, floor=None):
> +        """
> +        Same as `introrev` but, with an extra argument to limit changelog
> +        iteration range in some internal usecase.
> +
> +        If `floor` is set, the `introrev` will not be searched past that
> +        `floor` revision and "None" might be returned. This is useful to
> limit
> +        iteration range.
> +        """
>          lkr = self.linkrev()
>          lazyrev = self._lazyrev()
>          if lazyrev is not None:
>              if lazyrev == lkr:
>                  return lazyrev
>              else:
> -                return self._adjustlinkrev(lazyrev, inclusive=True)
> +                return self._adjustlinkrev(lazyrev, inclusive=True,
> floor=floor)
>          else:
> -            return self._findchangeid()
> +            return self._findchangeid(floor=floor)
>
>      def introfilectx(self):
>          """Return filectx having identical contents, but pointing to the
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>

Patch

diff --git a/mercurial/context.py b/mercurial/context.py
--- a/mercurial/context.py
+++ b/mercurial/context.py
@@ -633,7 +633,7 @@  class basefilectx(object):
     def _changeid(self):
         return self._findchangeid()
 
-    def _findchangeid(self):
+    def _findchangeid(self, floor=None):
         if r'_changeid' in self.__dict__:
             changeid = self._changeid
         elif r'_changectx' in self.__dict__:
@@ -641,10 +641,11 @@  class basefilectx(object):
         elif r'_descendantrev' in self.__dict__:
             # this file context was created from a revision with a known
             # descendant, we can (lazily) correct for linkrev aliases
-            changeid = self._adjustlinkrev(self._descendantrev)
+            changeid = self._adjustlinkrev(self._descendantrev, floor=floor)
         else:
             changeid = self._filelog.linkrev(self._filerev)
-        self._changeid = changeid
+        if changeid is not None:
+            self._changeid = changeid
         return changeid
 
     @propertycache
@@ -788,7 +789,7 @@  class basefilectx(object):
 
         return True
 
-    def _adjustlinkrev(self, srcrev, inclusive=False):
+    def _adjustlinkrev(self, srcrev, inclusive=False, floor=None):
         """return the first ancestor of <srcrev> introducing <fnode>
 
         If the linkrev of the file revision does not point to an ancestor of
@@ -797,6 +798,10 @@  class basefilectx(object):
 
         :srcrev: the changeset revision we search ancestors from
         :inclusive: if true, the src revision will also be checked
+        :floor: an optional revision to stop the walk at. If no introduction
+                of this file content could be found before this floor
+                revision, the function will returns "None" and stops its
+                iteration.
         """
         repo = self._repo
         cl = repo.unfiltered().changelog
@@ -822,6 +827,8 @@  class basefilectx(object):
             fnode = self._filenode
             path = self._path
             for a in iteranc:
+                if floor is not None and a < floor:
+                    return None
                 ac = cl.read(a) # get changeset data (we avoid object creation)
                 if path in ac[3]: # checking the 'files' field.
                     # The file has been touched, check if the content is
@@ -837,8 +844,12 @@  class basefilectx(object):
     def isintroducedafter(self, changelogrev):
         """True if a filectx have been introduced after a given floor revision
         """
-        return (changelogrev <= self.linkrev()
-                or changelogrev <= self._introrev())
+        if changelogrev <= self.linkrev():
+            return True
+        introrev = self._introrev(floor=changelogrev)
+        if introrev is None:
+            return False
+        return changelogrev <= introrev
 
     def _lazyrev(self):
         """return self.rev() if it is available without computation,
@@ -865,19 +876,28 @@  class basefilectx(object):
         revision is one of its ancestors. This prevents bugs from
         'linkrev-shadowing' when a file revision is used by multiple
         changesets.
+
         """
         return self._introrev()
 
-    def _introrev(self):
+    def _introrev(self, floor=None):
+        """
+        Same as `introrev` but, with an extra argument to limit changelog
+        iteration range in some internal usecase.
+
+        If `floor` is set, the `introrev` will not be searched past that
+        `floor` revision and "None" might be returned. This is useful to limit
+        iteration range.
+        """
         lkr = self.linkrev()
         lazyrev = self._lazyrev()
         if lazyrev is not None:
             if lazyrev == lkr:
                 return lazyrev
             else:
-                return self._adjustlinkrev(lazyrev, inclusive=True)
+                return self._adjustlinkrev(lazyrev, inclusive=True, floor=floor)
         else:
-            return self._findchangeid()
+            return self._findchangeid(floor=floor)
 
     def introfilectx(self):
         """Return filectx having identical contents, but pointing to the