Patchwork [09,of,12] revlogdeltas: extract _getcandidaterevs in a function

login
register
mail settings
Submitter Boris Feld
Date Aug. 18, 2018, 9:27 a.m.
Message ID <8e08a733133d84db3587.1534584444@FB-lair>
Download mbox | patch
Permalink /patch/33874/
State Accepted
Headers show

Comments

Boris Feld - Aug. 18, 2018, 9:27 a.m.
# HG changeset patch
# User Boris Feld <boris.feld@octobus.net>
# Date 1534570054 -7200
#      Sat Aug 18 07:27:34 2018 +0200
# Node ID 8e08a733133d84db3587e2d780acbb654ade7371
# Parent  23aa0ae26db3a1f912e4f32ffb1f72758d4462f0
# EXP-Topic sparse-snapshot
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 8e08a733133d
revlogdeltas: extract _getcandidaterevs in a function

The logic barely uses the object it is attached to. This is an important
function that we will clean up in the coming changesets. Moving it at the top
level helps us with that cleanup.
Gregory Szorc - Aug. 24, 2018, 5:56 p.m.
On Sat, Aug 18, 2018 at 2:27 AM Boris Feld <boris.feld@octobus.net> wrote:

> # HG changeset patch
> # User Boris Feld <boris.feld@octobus.net>
> # Date 1534570054 -7200
> #      Sat Aug 18 07:27:34 2018 +0200
> # Node ID 8e08a733133d84db3587e2d780acbb654ade7371
> # Parent  23aa0ae26db3a1f912e4f32ffb1f72758d4462f0
> # EXP-Topic sparse-snapshot
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r
> 8e08a733133d
> revlogdeltas: extract _getcandidaterevs in a function
>
> The logic barely uses the object it is attached to. This is an important
> function that we will clean up in the coming changesets. Moving it at the
> top
> level helps us with that cleanup.
>
> diff --git a/mercurial/revlogutils/deltas.py
> b/mercurial/revlogutils/deltas.py
> --- a/mercurial/revlogutils/deltas.py
> +++ b/mercurial/revlogutils/deltas.py
> @@ -568,57 +568,57 @@ def isgooddeltainfo(revlog, deltainfo, r
>
>      return True
>
> +def _candidate_groups(revlog, p1, p2, cachedelta):
>

This doesn't conform with our naming conventions.


> +    """
> +    Provides revisions that present an interest to be diffed against,
> +    grouped by level of easiness.
> +    """
> +    gdelta = revlog._generaldelta
> +    curr = len(revlog)
> +    prev = curr - 1
> +    p1r, p2r = revlog.rev(p1), revlog.rev(p2)
> +
> +    # should we try to build a delta?
> +    if prev != nullrev and revlog.storedeltachains:
> +        tested = set()
> +        # This condition is true most of the time when processing
> +        # changegroup data into a generaldelta repo. The only time it
> +        # isn't true is if this is the first revision in a delta chain
> +        # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
> +        if cachedelta and gdelta and revlog._lazydeltabase:
> +            # Assume what we received from the server is a good choice
> +            # build delta will reuse the cache
> +            yield (cachedelta[0],)
> +            tested.add(cachedelta[0])
> +
> +        if gdelta:
> +            # exclude already lazy tested base if any
> +            parents = [p for p in (p1r, p2r)
> +                       if p != nullrev and p not in tested]
> +
> +            if not revlog._deltabothparents and len(parents) == 2:
> +                parents.sort()
> +                # To minimize the chance of having to build a fulltext,
> +                # pick first whichever parent is closest to us (max rev)
> +                yield (parents[1],)
> +                # then the other one (min rev) if the first did not fit
> +                yield (parents[0],)
> +                tested.update(parents)
> +            elif len(parents) > 0:
> +                # Test all parents (1 or 2), and keep the best candidate
> +                yield parents
> +                tested.update(parents)
> +
> +        if prev not in tested:
> +            # other approach failed try against prev to hopefully save us
> a
> +            # fulltext.
> +            yield (prev,)
> +            tested.add(prev)
> +
>  class deltacomputer(object):
>      def __init__(self, revlog):
>          self.revlog = revlog
>
> -    def _getcandidaterevs(self, p1, p2, cachedelta):
> -        """
> -        Provides revisions that present an interest to be diffed against,
> -        grouped by level of easiness.
> -        """
> -        revlog = self.revlog
> -        gdelta = revlog._generaldelta
> -        curr = len(revlog)
> -        prev = curr - 1
> -        p1r, p2r = revlog.rev(p1), revlog.rev(p2)
> -
> -        # should we try to build a delta?
> -        if prev != nullrev and revlog.storedeltachains:
> -            tested = set()
> -            # This condition is true most of the time when processing
> -            # changegroup data into a generaldelta repo. The only time it
> -            # isn't true is if this is the first revision in a delta chain
> -            # or if ``format.generaldelta=true`` disabled
> ``lazydeltabase``.
> -            if cachedelta and gdelta and revlog._lazydeltabase:
> -                # Assume what we received from the server is a good choice
> -                # build delta will reuse the cache
> -                yield (cachedelta[0],)
> -                tested.add(cachedelta[0])
> -
> -            if gdelta:
> -                # exclude already lazy tested base if any
> -                parents = [p for p in (p1r, p2r)
> -                           if p != nullrev and p not in tested]
> -
> -                if not revlog._deltabothparents and len(parents) == 2:
> -                    parents.sort()
> -                    # To minimize the chance of having to build a
> fulltext,
> -                    # pick first whichever parent is closest to us (max
> rev)
> -                    yield (parents[1],)
> -                    # then the other one (min rev) if the first did not
> fit
> -                    yield (parents[0],)
> -                    tested.update(parents)
> -                elif len(parents) > 0:
> -                    # Test all parents (1 or 2), and keep the best
> candidate
> -                    yield parents
> -                    tested.update(parents)
> -
> -            if prev not in tested:
> -                # other approach failed try against prev to hopefully
> save us a
> -                # fulltext.
> -                yield (prev,)
> -                tested.add(prev)
>
>      def buildtext(self, revinfo, fh):
>          """Builds a fulltext version of a revision
> @@ -712,7 +712,7 @@ class deltacomputer(object):
>                   depending on whether it is inlined or not
>
>          Returns the first acceptable candidate revision, as ordered by
> -        _getcandidaterevs
> +        _candidate_groups
>
>          If no suitable deltabase is found, we return delta info for a full
>          snapshot.
> @@ -736,7 +736,8 @@ class deltacomputer(object):
>
>          deltainfo = None
>          deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
> -        for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
> +        candidate_groups = _candidate_groups(self.revlog, p1, p2,
> cachedelta)
>

Same.


> +        for candidaterevs in candidate_groups:
>              # filter out delta base that will never produce good delta
>              candidaterevs = [r for r in candidaterevs
>                               if self.revlog.length(r) <= deltas_limit]
>

Patch

diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -568,57 +568,57 @@  def isgooddeltainfo(revlog, deltainfo, r
 
     return True
 
+def _candidate_groups(revlog, p1, p2, cachedelta):
+    """
+    Provides revisions that present an interest to be diffed against,
+    grouped by level of easiness.
+    """
+    gdelta = revlog._generaldelta
+    curr = len(revlog)
+    prev = curr - 1
+    p1r, p2r = revlog.rev(p1), revlog.rev(p2)
+
+    # should we try to build a delta?
+    if prev != nullrev and revlog.storedeltachains:
+        tested = set()
+        # This condition is true most of the time when processing
+        # changegroup data into a generaldelta repo. The only time it
+        # isn't true is if this is the first revision in a delta chain
+        # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
+        if cachedelta and gdelta and revlog._lazydeltabase:
+            # Assume what we received from the server is a good choice
+            # build delta will reuse the cache
+            yield (cachedelta[0],)
+            tested.add(cachedelta[0])
+
+        if gdelta:
+            # exclude already lazy tested base if any
+            parents = [p for p in (p1r, p2r)
+                       if p != nullrev and p not in tested]
+
+            if not revlog._deltabothparents and len(parents) == 2:
+                parents.sort()
+                # To minimize the chance of having to build a fulltext,
+                # pick first whichever parent is closest to us (max rev)
+                yield (parents[1],)
+                # then the other one (min rev) if the first did not fit
+                yield (parents[0],)
+                tested.update(parents)
+            elif len(parents) > 0:
+                # Test all parents (1 or 2), and keep the best candidate
+                yield parents
+                tested.update(parents)
+
+        if prev not in tested:
+            # other approach failed try against prev to hopefully save us a
+            # fulltext.
+            yield (prev,)
+            tested.add(prev)
+
 class deltacomputer(object):
     def __init__(self, revlog):
         self.revlog = revlog
 
-    def _getcandidaterevs(self, p1, p2, cachedelta):
-        """
-        Provides revisions that present an interest to be diffed against,
-        grouped by level of easiness.
-        """
-        revlog = self.revlog
-        gdelta = revlog._generaldelta
-        curr = len(revlog)
-        prev = curr - 1
-        p1r, p2r = revlog.rev(p1), revlog.rev(p2)
-
-        # should we try to build a delta?
-        if prev != nullrev and revlog.storedeltachains:
-            tested = set()
-            # This condition is true most of the time when processing
-            # changegroup data into a generaldelta repo. The only time it
-            # isn't true is if this is the first revision in a delta chain
-            # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
-            if cachedelta and gdelta and revlog._lazydeltabase:
-                # Assume what we received from the server is a good choice
-                # build delta will reuse the cache
-                yield (cachedelta[0],)
-                tested.add(cachedelta[0])
-
-            if gdelta:
-                # exclude already lazy tested base if any
-                parents = [p for p in (p1r, p2r)
-                           if p != nullrev and p not in tested]
-
-                if not revlog._deltabothparents and len(parents) == 2:
-                    parents.sort()
-                    # To minimize the chance of having to build a fulltext,
-                    # pick first whichever parent is closest to us (max rev)
-                    yield (parents[1],)
-                    # then the other one (min rev) if the first did not fit
-                    yield (parents[0],)
-                    tested.update(parents)
-                elif len(parents) > 0:
-                    # Test all parents (1 or 2), and keep the best candidate
-                    yield parents
-                    tested.update(parents)
-
-            if prev not in tested:
-                # other approach failed try against prev to hopefully save us a
-                # fulltext.
-                yield (prev,)
-                tested.add(prev)
 
     def buildtext(self, revinfo, fh):
         """Builds a fulltext version of a revision
@@ -712,7 +712,7 @@  class deltacomputer(object):
                  depending on whether it is inlined or not
 
         Returns the first acceptable candidate revision, as ordered by
-        _getcandidaterevs
+        _candidate_groups
 
         If no suitable deltabase is found, we return delta info for a full
         snapshot.
@@ -736,7 +736,8 @@  class deltacomputer(object):
 
         deltainfo = None
         deltas_limit = revinfo.textlen * LIMIT_DELTA2TEXT
-        for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
+        candidate_groups = _candidate_groups(self.revlog, p1, p2, cachedelta)
+        for candidaterevs in candidate_groups:
             # filter out delta base that will never produce good delta
             candidaterevs = [r for r in candidaterevs
                              if self.revlog.length(r) <= deltas_limit]