Patchwork [04,of,11,V2] revlog: move the good delta heuristic in revlogutils.deltas

login
register
mail settings
Submitter Boris Feld
Date Aug. 27, 2018, 10:06 a.m.
Message ID <7e67a3db18113ec4ae6e.1535364391@FB-lair>
Download mbox | patch
Permalink /patch/34067/
State Accepted
Headers show

Comments

Boris Feld - Aug. 27, 2018, 10:06 a.m.
# HG changeset patch
# User Boris Feld <boris.feld@octobus.net>
# Date 1534380638 -7200
#      Thu Aug 16 02:50:38 2018 +0200
# Node ID 7e67a3db18113ec4ae6efca9359bcb69ed0d6ee9
# Parent  a061be6dce561b65bf8e008333f7737466dbcdf1
# EXP-Topic sparse-snapshot
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 7e67a3db1811
revlog: move the good delta heuristic in revlogutils.deltas

This is logic about building delta chains. We move it with related logic. The
only user of this logic moved in this module already.

Patch

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -38,7 +38,6 @@  from .i18n import _
 from .revlogutils.constants import (
     FLAG_GENERALDELTA,
     FLAG_INLINE_DATA,
-    LIMIT_DELTA2TEXT,
     REVIDX_DEFAULT_FLAGS,
     REVIDX_ELLIPSIS,
     REVIDX_EXTSTORED,
@@ -1915,96 +1914,6 @@  class revlog(object):
 
         return compressor.decompress(data)
 
-    def _isgooddeltainfo(self, deltainfo, revinfo):
-        """Returns True if the given delta is good. Good means that it is within
-        the disk span, disk size, and chain length bounds that we know to be
-        performant."""
-        if deltainfo is None:
-            return False
-
-        # - 'deltainfo.distance' is the distance from the base revision --
-        #   bounding it limits the amount of I/O we need to do.
-        # - 'deltainfo.compresseddeltalen' is the sum of the total size of
-        #   deltas we need to apply -- bounding it limits the amount of CPU
-        #   we consume.
-
-        if self._sparserevlog:
-            # As sparse-read will be used, we can consider that the distance,
-            # instead of being the span of the whole chunk,
-            # is the span of the largest read chunk
-            base = deltainfo.base
-
-            if base != nullrev:
-                deltachain = self._deltachain(base)[0]
-            else:
-                deltachain = []
-
-            # search for the first non-snapshot revision
-            for idx, r in enumerate(deltachain):
-                if not self.issnapshot(r):
-                    break
-            deltachain = deltachain[idx:]
-            chunks = deltautil.slicechunk(self, deltachain, deltainfo)
-            all_span = [deltautil.segmentspan(self, revs, deltainfo)
-                        for revs in chunks]
-            distance = max(all_span)
-        else:
-            distance = deltainfo.distance
-
-        textlen = revinfo.textlen
-        defaultmax = textlen * 4
-        maxdist = self._maxdeltachainspan
-        if not maxdist:
-            maxdist = distance # ensure the conditional pass
-        maxdist = max(maxdist, defaultmax)
-        if self._sparserevlog and maxdist < self._srmingapsize:
-            # In multiple place, we are ignoring irrelevant data range below a
-            # certain size. Be also apply this tradeoff here and relax span
-            # constraint for small enought content.
-            maxdist = self._srmingapsize
-
-        # Bad delta from read span:
-        #
-        #   If the span of data read is larger than the maximum allowed.
-        if maxdist < distance:
-            return False
-
-        # Bad delta from new delta size:
-        #
-        #   If the delta size is larger than the target text, storing the
-        #   delta will be inefficient.
-        if textlen < deltainfo.deltalen:
-            return False
-
-        # Bad delta from cumulated payload size:
-        #
-        #   If the sum of delta get larger than K * target text length.
-        if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
-            return False
-
-        # Bad delta from chain length:
-        #
-        #   If the number of delta in the chain gets too high.
-        if self._maxchainlen and  self._maxchainlen < deltainfo.chainlen:
-            return False
-
-        # bad delta from intermediate snapshot size limit
-        #
-        #   If an intermediate snapshot size is higher than the limit.  The
-        #   limit exist to prevent endless chain of intermediate delta to be
-        #   created.
-        if (deltainfo.snapshotdepth is not None and
-                (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
-            return False
-
-        # bad delta if new intermediate snapshot is larger than the previous
-        # snapshot
-        if (deltainfo.snapshotdepth
-                and self.length(deltainfo.base) < deltainfo.deltalen):
-            return False
-
-        return True
-
     def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                      cachedelta, ifh, dfh, alwayscache=False,
                      deltacomputer=None):
diff --git a/mercurial/revlogutils/constants.py b/mercurial/revlogutils/constants.py
--- a/mercurial/revlogutils/constants.py
+++ b/mercurial/revlogutils/constants.py
@@ -41,6 +41,3 @@  REVIDX_FLAGS_ORDER = [
 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
 # bitmark for flags that could cause rawdata content change
 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
-
-# maximum <delta-chain-data>/<revision-text-length> ratio
-LIMIT_DELTA2TEXT = 2
diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -19,7 +19,6 @@  from ..node import (
 from ..i18n import _
 
 from .constants import (
-    LIMIT_DELTA2TEXT,
     REVIDX_ISCENSORED,
     REVIDX_RAWTEXT_CHANGING_FLAGS,
 )
@@ -36,6 +35,9 @@  from .. import (
 RevlogError = error.RevlogError
 CensoredNodeError = error.CensoredNodeError
 
+# maximum <delta-chain-data>/<revision-text-length> ratio
+LIMIT_DELTA2TEXT = 2
+
 class _testrevlog(object):
     """minimalist fake revlog to use in doctests"""
 
@@ -447,6 +449,97 @@  class _deltainfo(object):
     compresseddeltalen = attr.ib()
     snapshotdepth = attr.ib()
 
+def isgooddeltainfo(revlog, deltainfo, revinfo):
+    """Returns True if the given delta is good. Good means that it is within
+    the disk span, disk size, and chain length bounds that we know to be
+    performant."""
+    if deltainfo is None:
+        return False
+
+    # - 'deltainfo.distance' is the distance from the base revision --
+    #   bounding it limits the amount of I/O we need to do.
+    # - 'deltainfo.compresseddeltalen' is the sum of the total size of
+    #   deltas we need to apply -- bounding it limits the amount of CPU
+    #   we consume.
+
+    if revlog._sparserevlog:
+        # As sparse-read will be used, we can consider that the distance,
+        # instead of being the span of the whole chunk,
+        # is the span of the largest read chunk
+        base = deltainfo.base
+
+        if base != nullrev:
+            deltachain = revlog._deltachain(base)[0]
+        else:
+            deltachain = []
+
+        # search for the first non-snapshot revision
+        for idx, r in enumerate(deltachain):
+            if not revlog.issnapshot(r):
+                break
+        deltachain = deltachain[idx:]
+        chunks = slicechunk(revlog, deltachain, deltainfo)
+        all_span = [segmentspan(revlog, revs, deltainfo)
+                    for revs in chunks]
+        distance = max(all_span)
+    else:
+        distance = deltainfo.distance
+
+    textlen = revinfo.textlen
+    defaultmax = textlen * 4
+    maxdist = revlog._maxdeltachainspan
+    if not maxdist:
+        maxdist = distance # ensure the conditional pass
+    maxdist = max(maxdist, defaultmax)
+    if revlog._sparserevlog and maxdist < revlog._srmingapsize:
+        # In multiple place, we are ignoring irrelevant data range below a
+        # certain size. Be also apply this tradeoff here and relax span
+        # constraint for small enought content.
+        maxdist = revlog._srmingapsize
+
+    # Bad delta from read span:
+    #
+    #   If the span of data read is larger than the maximum allowed.
+    if maxdist < distance:
+        return False
+
+    # Bad delta from new delta size:
+    #
+    #   If the delta size is larger than the target text, storing the
+    #   delta will be inefficient.
+    if textlen < deltainfo.deltalen:
+        return False
+
+    # Bad delta from cumulated payload size:
+    #
+    #   If the sum of delta get larger than K * target text length.
+    if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
+        return False
+
+    # Bad delta from chain length:
+    #
+    #   If the number of delta in the chain gets too high.
+    if (revlog._maxchainlen
+            and revlog._maxchainlen < deltainfo.chainlen):
+        return False
+
+    # bad delta from intermediate snapshot size limit
+    #
+    #   If an intermediate snapshot size is higher than the limit.  The
+    #   limit exist to prevent endless chain of intermediate delta to be
+    #   created.
+    if (deltainfo.snapshotdepth is not None and
+            (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
+        return False
+
+    # bad delta if new intermediate snapshot is larger than the previous
+    # snapshot
+    if (deltainfo.snapshotdepth
+            and revlog.length(deltainfo.base) < deltainfo.deltalen):
+        return False
+
+    return True
+
 class deltacomputer(object):
     def __init__(self, revlog):
         self.revlog = revlog
@@ -632,7 +725,7 @@  class deltacomputer(object):
                 if revlog.flags(candidaterev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
                     continue
                 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
-                if revlog._isgooddeltainfo(candidatedelta, revinfo):
+                if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
                     nominateddeltas.append(candidatedelta)
             if nominateddeltas:
                 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)