@@ -88,6 +88,10 @@ from .utils import (
stringutil,
)
+from .revlogutils import (
+ deltas as deltautil
+)
+
release = lockmod.release
command = registrar.command()
@@ -706,7 +710,7 @@ def debugdeltachain(ui, repo, file_=None
largestblock = 0
srchunks = 0
- for revschunk in revlog._slicechunk(r, chain):
+ for revschunk in deltautil.slicechunk(r, chain):
srchunks += 1
blkend = start(revschunk[-1]) + length(revschunk[-1])
blksize = blkend - start(revschunk[0])
@@ -17,7 +17,6 @@ import collections
import contextlib
import errno
import hashlib
-import heapq
import os
import re
import struct
@@ -214,406 +213,6 @@ def hash(text, p1, p2):
s.update(text)
return s.digest()
-class _testrevlog(object):
- """minimalist fake revlog to use in doctests"""
-
- def __init__(self, data, density=0.5, mingap=0):
- """data is an list of revision payload boundaries"""
- self._data = data
- self._srdensitythreshold = density
- self._srmingapsize = mingap
-
- def start(self, rev):
- if rev == 0:
- return 0
- return self._data[rev - 1]
-
- def end(self, rev):
- return self._data[rev]
-
- def length(self, rev):
- return self.end(rev) - self.start(rev)
-
- def __len__(self):
- return len(self._data)
-
-def _trimchunk(revlog, revs, startidx, endidx=None):
- """returns revs[startidx:endidx] without empty trailing revs
-
- Doctest Setup
- >>> revlog = _testrevlog([
- ... 5, #0
- ... 10, #1
- ... 12, #2
- ... 12, #3 (empty)
- ... 17, #4
- ... 21, #5
- ... 21, #6 (empty)
- ... ])
-
- Contiguous cases:
- >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
- [0, 1, 2, 3, 4, 5]
- >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
- [0, 1, 2, 3, 4]
- >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
- [0, 1, 2]
- >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
- [2]
- >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
- [3, 4, 5]
- >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
- [3, 4]
-
- Discontiguous cases:
- >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
- [1, 3, 5]
- >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
- [1]
- >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
- [3, 5]
- >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
- [3, 5]
- """
- length = revlog.length
-
- if endidx is None:
- endidx = len(revs)
-
- # If we have a non-emtpy delta candidate, there are nothing to trim
- if revs[endidx - 1] < len(revlog):
- # Trim empty revs at the end, except the very first revision of a chain
- while (endidx > 1
- and endidx > startidx
- and length(revs[endidx - 1]) == 0):
- endidx -= 1
-
- return revs[startidx:endidx]
-
-def _segmentspan(revlog, revs, deltainfo=None):
- """Get the byte span of a segment of revisions
-
- revs is a sorted array of revision numbers
-
- >>> revlog = _testrevlog([
- ... 5, #0
- ... 10, #1
- ... 12, #2
- ... 12, #3 (empty)
- ... 17, #4
- ... ])
-
- >>> _segmentspan(revlog, [0, 1, 2, 3, 4])
- 17
- >>> _segmentspan(revlog, [0, 4])
- 17
- >>> _segmentspan(revlog, [3, 4])
- 5
- >>> _segmentspan(revlog, [1, 2, 3,])
- 7
- >>> _segmentspan(revlog, [1, 3])
- 7
- """
- if not revs:
- return 0
- if deltainfo is not None and len(revlog) <= revs[-1]:
- if len(revs) == 1:
- return deltainfo.deltalen
- offset = revlog.end(len(revlog) - 1)
- end = deltainfo.deltalen + offset
- else:
- end = revlog.end(revs[-1])
- return end - revlog.start(revs[0])
-
-def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
- """slice revs to reduce the amount of unrelated data to be read from disk.
-
- ``revs`` is sliced into groups that should be read in one time.
- Assume that revs are sorted.
-
- The initial chunk is sliced until the overall density (payload/chunks-span
- ratio) is above `revlog._srdensitythreshold`. No gap smaller than
- `revlog._srmingapsize` is skipped.
-
- If `targetsize` is set, no chunk larger than `targetsize` will be yield.
- For consistency with other slicing choice, this limit won't go lower than
- `revlog._srmingapsize`.
-
- If individual revisions chunk are larger than this limit, they will still
- be raised individually.
-
- >>> revlog = _testrevlog([
- ... 5, #00 (5)
- ... 10, #01 (5)
- ... 12, #02 (2)
- ... 12, #03 (empty)
- ... 27, #04 (15)
- ... 31, #05 (4)
- ... 31, #06 (empty)
- ... 42, #07 (11)
- ... 47, #08 (5)
- ... 47, #09 (empty)
- ... 48, #10 (1)
- ... 51, #11 (3)
- ... 74, #12 (23)
- ... 85, #13 (11)
- ... 86, #14 (1)
- ... 91, #15 (5)
- ... ])
-
- >>> list(_slicechunk(revlog, list(range(16))))
- [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
- >>> list(_slicechunk(revlog, [0, 15]))
- [[0], [15]]
- >>> list(_slicechunk(revlog, [0, 11, 15]))
- [[0], [11], [15]]
- >>> list(_slicechunk(revlog, [0, 11, 13, 15]))
- [[0], [11, 13, 15]]
- >>> list(_slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
- [[1, 2], [5, 8, 10, 11], [14]]
-
- Slicing with a maximum chunk size
- >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
- [[0], [11], [13], [15]]
- >>> list(_slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
- [[0], [11], [13, 15]]
- """
- if targetsize is not None:
- targetsize = max(targetsize, revlog._srmingapsize)
- # targetsize should not be specified when evaluating delta candidates:
- # * targetsize is used to ensure we stay within specification when reading,
- # * deltainfo is used to pick are good delta chain when writing.
- if not (deltainfo is None or targetsize is None):
- msg = 'cannot use `targetsize` with a `deltainfo`'
- raise error.ProgrammingError(msg)
- for chunk in _slicechunktodensity(revlog, revs,
- deltainfo,
- revlog._srdensitythreshold,
- revlog._srmingapsize):
- for subchunk in _slicechunktosize(revlog, chunk, targetsize):
- yield subchunk
-
-def _slicechunktosize(revlog, revs, targetsize=None):
- """slice revs to match the target size
-
- This is intended to be used on chunk that density slicing selected by that
- are still too large compared to the read garantee of revlog. This might
- happens when "minimal gap size" interrupted the slicing or when chain are
- built in a way that create large blocks next to each other.
-
- >>> revlog = _testrevlog([
- ... 3, #0 (3)
- ... 5, #1 (2)
- ... 6, #2 (1)
- ... 8, #3 (2)
- ... 8, #4 (empty)
- ... 11, #5 (3)
- ... 12, #6 (1)
- ... 13, #7 (1)
- ... 14, #8 (1)
- ... ])
-
- Cases where chunk is already small enough
- >>> list(_slicechunktosize(revlog, [0], 3))
- [[0]]
- >>> list(_slicechunktosize(revlog, [6, 7], 3))
- [[6, 7]]
- >>> list(_slicechunktosize(revlog, [0], None))
- [[0]]
- >>> list(_slicechunktosize(revlog, [6, 7], None))
- [[6, 7]]
-
- cases where we need actual slicing
- >>> list(_slicechunktosize(revlog, [0, 1], 3))
- [[0], [1]]
- >>> list(_slicechunktosize(revlog, [1, 3], 3))
- [[1], [3]]
- >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
- [[1, 2], [3]]
- >>> list(_slicechunktosize(revlog, [3, 5], 3))
- [[3], [5]]
- >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
- [[3], [5]]
- >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
- [[5], [6, 7, 8]]
- >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
- [[0], [1, 2], [3], [5], [6, 7, 8]]
-
- Case with too large individual chunk (must return valid chunk)
- >>> list(_slicechunktosize(revlog, [0, 1], 2))
- [[0], [1]]
- >>> list(_slicechunktosize(revlog, [1, 3], 1))
- [[1], [3]]
- >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
- [[3], [5]]
- """
- assert targetsize is None or 0 <= targetsize
- if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
- yield revs
- return
-
- startrevidx = 0
- startdata = revlog.start(revs[0])
- endrevidx = 0
- iterrevs = enumerate(revs)
- next(iterrevs) # skip first rev.
- for idx, r in iterrevs:
- span = revlog.end(r) - startdata
- if span <= targetsize:
- endrevidx = idx
- else:
- chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
- if chunk:
- yield chunk
- startrevidx = idx
- startdata = revlog.start(r)
- endrevidx = idx
- yield _trimchunk(revlog, revs, startrevidx)
-
-def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
- mingapsize=0):
- """slice revs to reduce the amount of unrelated data to be read from disk.
-
- ``revs`` is sliced into groups that should be read in one time.
- Assume that revs are sorted.
-
- ``deltainfo`` is a _deltainfo instance of a revision that we would append
- to the top of the revlog.
-
- The initial chunk is sliced until the overall density (payload/chunks-span
- ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
- skipped.
-
- >>> revlog = _testrevlog([
- ... 5, #00 (5)
- ... 10, #01 (5)
- ... 12, #02 (2)
- ... 12, #03 (empty)
- ... 27, #04 (15)
- ... 31, #05 (4)
- ... 31, #06 (empty)
- ... 42, #07 (11)
- ... 47, #08 (5)
- ... 47, #09 (empty)
- ... 48, #10 (1)
- ... 51, #11 (3)
- ... 74, #12 (23)
- ... 85, #13 (11)
- ... 86, #14 (1)
- ... 91, #15 (5)
- ... ])
-
- >>> list(_slicechunktodensity(revlog, list(range(16))))
- [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
- >>> list(_slicechunktodensity(revlog, [0, 15]))
- [[0], [15]]
- >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
- [[0], [11], [15]]
- >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
- [[0], [11, 13, 15]]
- >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
- [[1, 2], [5, 8, 10, 11], [14]]
- >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
- ... mingapsize=20))
- [[1, 2, 3, 5, 8, 10, 11], [14]]
- >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
- ... targetdensity=0.95))
- [[1, 2], [5], [8, 10, 11], [14]]
- >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
- ... targetdensity=0.95, mingapsize=12))
- [[1, 2], [5, 8, 10, 11], [14]]
- """
- start = revlog.start
- length = revlog.length
-
- if len(revs) <= 1:
- yield revs
- return
-
- nextrev = len(revlog)
- nextoffset = revlog.end(nextrev - 1)
-
- if deltainfo is None:
- deltachainspan = _segmentspan(revlog, revs)
- chainpayload = sum(length(r) for r in revs)
- else:
- deltachainspan = deltainfo.distance
- chainpayload = deltainfo.compresseddeltalen
-
- if deltachainspan < mingapsize:
- yield revs
- return
-
- readdata = deltachainspan
-
- if deltachainspan:
- density = chainpayload / float(deltachainspan)
- else:
- density = 1.0
-
- if density >= targetdensity:
- yield revs
- return
-
- if deltainfo is not None and deltainfo.deltalen:
- revs = list(revs)
- revs.append(nextrev)
-
- # Store the gaps in a heap to have them sorted by decreasing size
- gapsheap = []
- heapq.heapify(gapsheap)
- prevend = None
- for i, rev in enumerate(revs):
- if rev < nextrev:
- revstart = start(rev)
- revlen = length(rev)
- else:
- revstart = nextoffset
- revlen = deltainfo.deltalen
-
- # Skip empty revisions to form larger holes
- if revlen == 0:
- continue
-
- if prevend is not None:
- gapsize = revstart - prevend
- # only consider holes that are large enough
- if gapsize > mingapsize:
- heapq.heappush(gapsheap, (-gapsize, i))
-
- prevend = revstart + revlen
-
- # Collect the indices of the largest holes until the density is acceptable
- indicesheap = []
- heapq.heapify(indicesheap)
- while gapsheap and density < targetdensity:
- oppgapsize, gapidx = heapq.heappop(gapsheap)
-
- heapq.heappush(indicesheap, gapidx)
-
- # the gap sizes are stored as negatives to be sorted decreasingly
- # by the heap
- readdata -= (-oppgapsize)
- if readdata > 0:
- density = chainpayload / float(readdata)
- else:
- density = 1.0
-
- # Cut the revs at collected indices
- previdx = 0
- while indicesheap:
- idx = heapq.heappop(indicesheap)
-
- chunk = _trimchunk(revlog, revs, previdx, idx)
- if chunk:
- yield chunk
-
- previdx = idx
-
- chunk = _trimchunk(revlog, revs, previdx)
- if chunk:
- yield chunk
-
@attr.s(slots=True, frozen=True)
class _revisioninfo(object):
"""Information about a revision that allows building its fulltext
@@ -1894,7 +1493,8 @@ class revlog(object):
if not self._withsparseread:
slicedchunks = (revs,)
else:
- slicedchunks = _slicechunk(self, revs, targetsize=targetsize)
+ slicedchunks = deltautil.slicechunk(self, revs,
+ targetsize=targetsize)
for revschunk in slicedchunks:
firstrev = revschunk[0]
@@ -2344,8 +1944,9 @@ class revlog(object):
if not self.issnapshot(r):
break
deltachain = deltachain[idx:]
- chunks = _slicechunk(self, deltachain, deltainfo)
- all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
+ chunks = deltautil.slicechunk(self, deltachain, deltainfo)
+ all_span = [deltautil.segmentspan(self, revs, deltainfo)
+ for revs in chunks]
distance = max(all_span)
else:
distance = deltainfo.distance
@@ -9,6 +9,7 @@
from __future__ import absolute_import
+import heapq
import struct
# import stuff from node for others to import from revlog
@@ -35,6 +36,406 @@ from .. import (
RevlogError = error.RevlogError
CensoredNodeError = error.CensoredNodeError
+class _testrevlog(object):
+ """minimalist fake revlog to use in doctests"""
+
+ def __init__(self, data, density=0.5, mingap=0):
+ """data is an list of revision payload boundaries"""
+ self._data = data
+ self._srdensitythreshold = density
+ self._srmingapsize = mingap
+
+ def start(self, rev):
+ if rev == 0:
+ return 0
+ return self._data[rev - 1]
+
+ def end(self, rev):
+ return self._data[rev]
+
+ def length(self, rev):
+ return self.end(rev) - self.start(rev)
+
+ def __len__(self):
+ return len(self._data)
+
+def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
+ """slice revs to reduce the amount of unrelated data to be read from disk.
+
+ ``revs`` is sliced into groups that should be read in one time.
+ Assume that revs are sorted.
+
+ The initial chunk is sliced until the overall density (payload/chunks-span
+ ratio) is above `revlog._srdensitythreshold`. No gap smaller than
+ `revlog._srmingapsize` is skipped.
+
+ If `targetsize` is set, no chunk larger than `targetsize` will be yield.
+ For consistency with other slicing choice, this limit won't go lower than
+ `revlog._srmingapsize`.
+
+ If individual revisions chunk are larger than this limit, they will still
+ be raised individually.
+
+ >>> revlog = _testrevlog([
+ ... 5, #00 (5)
+ ... 10, #01 (5)
+ ... 12, #02 (2)
+ ... 12, #03 (empty)
+ ... 27, #04 (15)
+ ... 31, #05 (4)
+ ... 31, #06 (empty)
+ ... 42, #07 (11)
+ ... 47, #08 (5)
+ ... 47, #09 (empty)
+ ... 48, #10 (1)
+ ... 51, #11 (3)
+ ... 74, #12 (23)
+ ... 85, #13 (11)
+ ... 86, #14 (1)
+ ... 91, #15 (5)
+ ... ])
+
+ >>> list(slicechunk(revlog, list(range(16))))
+ [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
+ >>> list(slicechunk(revlog, [0, 15]))
+ [[0], [15]]
+ >>> list(slicechunk(revlog, [0, 11, 15]))
+ [[0], [11], [15]]
+ >>> list(slicechunk(revlog, [0, 11, 13, 15]))
+ [[0], [11, 13, 15]]
+ >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
+ [[1, 2], [5, 8, 10, 11], [14]]
+
+ Slicing with a maximum chunk size
+ >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
+ [[0], [11], [13], [15]]
+ >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
+ [[0], [11], [13, 15]]
+ """
+ if targetsize is not None:
+ targetsize = max(targetsize, revlog._srmingapsize)
+ # targetsize should not be specified when evaluating delta candidates:
+ # * targetsize is used to ensure we stay within specification when reading,
+ # * deltainfo is used to pick are good delta chain when writing.
+ if not (deltainfo is None or targetsize is None):
+ msg = 'cannot use `targetsize` with a `deltainfo`'
+ raise error.ProgrammingError(msg)
+ for chunk in _slicechunktodensity(revlog, revs,
+ deltainfo,
+ revlog._srdensitythreshold,
+ revlog._srmingapsize):
+ for subchunk in _slicechunktosize(revlog, chunk, targetsize):
+ yield subchunk
+
+def _slicechunktosize(revlog, revs, targetsize=None):
+ """slice revs to match the target size
+
+ This is intended to be used on chunk that density slicing selected by that
+ are still too large compared to the read garantee of revlog. This might
+ happens when "minimal gap size" interrupted the slicing or when chain are
+ built in a way that create large blocks next to each other.
+
+ >>> revlog = _testrevlog([
+ ... 3, #0 (3)
+ ... 5, #1 (2)
+ ... 6, #2 (1)
+ ... 8, #3 (2)
+ ... 8, #4 (empty)
+ ... 11, #5 (3)
+ ... 12, #6 (1)
+ ... 13, #7 (1)
+ ... 14, #8 (1)
+ ... ])
+
+ Cases where chunk is already small enough
+ >>> list(_slicechunktosize(revlog, [0], 3))
+ [[0]]
+ >>> list(_slicechunktosize(revlog, [6, 7], 3))
+ [[6, 7]]
+ >>> list(_slicechunktosize(revlog, [0], None))
+ [[0]]
+ >>> list(_slicechunktosize(revlog, [6, 7], None))
+ [[6, 7]]
+
+ cases where we need actual slicing
+ >>> list(_slicechunktosize(revlog, [0, 1], 3))
+ [[0], [1]]
+ >>> list(_slicechunktosize(revlog, [1, 3], 3))
+ [[1], [3]]
+ >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
+ [[1, 2], [3]]
+ >>> list(_slicechunktosize(revlog, [3, 5], 3))
+ [[3], [5]]
+ >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
+ [[3], [5]]
+ >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
+ [[5], [6, 7, 8]]
+ >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
+ [[0], [1, 2], [3], [5], [6, 7, 8]]
+
+ Case with too large individual chunk (must return valid chunk)
+ >>> list(_slicechunktosize(revlog, [0, 1], 2))
+ [[0], [1]]
+ >>> list(_slicechunktosize(revlog, [1, 3], 1))
+ [[1], [3]]
+ >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
+ [[3], [5]]
+ """
+ assert targetsize is None or 0 <= targetsize
+ if targetsize is None or segmentspan(revlog, revs) <= targetsize:
+ yield revs
+ return
+
+ startrevidx = 0
+ startdata = revlog.start(revs[0])
+ endrevidx = 0
+ iterrevs = enumerate(revs)
+ next(iterrevs) # skip first rev.
+ for idx, r in iterrevs:
+ span = revlog.end(r) - startdata
+ if span <= targetsize:
+ endrevidx = idx
+ else:
+ chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
+ if chunk:
+ yield chunk
+ startrevidx = idx
+ startdata = revlog.start(r)
+ endrevidx = idx
+ yield _trimchunk(revlog, revs, startrevidx)
+
+def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
+ mingapsize=0):
+ """slice revs to reduce the amount of unrelated data to be read from disk.
+
+ ``revs`` is sliced into groups that should be read in one time.
+ Assume that revs are sorted.
+
+ ``deltainfo`` is a _deltainfo instance of a revision that we would append
+ to the top of the revlog.
+
+ The initial chunk is sliced until the overall density (payload/chunks-span
+ ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
+ skipped.
+
+ >>> revlog = _testrevlog([
+ ... 5, #00 (5)
+ ... 10, #01 (5)
+ ... 12, #02 (2)
+ ... 12, #03 (empty)
+ ... 27, #04 (15)
+ ... 31, #05 (4)
+ ... 31, #06 (empty)
+ ... 42, #07 (11)
+ ... 47, #08 (5)
+ ... 47, #09 (empty)
+ ... 48, #10 (1)
+ ... 51, #11 (3)
+ ... 74, #12 (23)
+ ... 85, #13 (11)
+ ... 86, #14 (1)
+ ... 91, #15 (5)
+ ... ])
+
+ >>> list(_slicechunktodensity(revlog, list(range(16))))
+ [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
+ >>> list(_slicechunktodensity(revlog, [0, 15]))
+ [[0], [15]]
+ >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
+ [[0], [11], [15]]
+ >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
+ [[0], [11, 13, 15]]
+ >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
+ [[1, 2], [5, 8, 10, 11], [14]]
+ >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
+ ... mingapsize=20))
+ [[1, 2, 3, 5, 8, 10, 11], [14]]
+ >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
+ ... targetdensity=0.95))
+ [[1, 2], [5], [8, 10, 11], [14]]
+ >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
+ ... targetdensity=0.95, mingapsize=12))
+ [[1, 2], [5, 8, 10, 11], [14]]
+ """
+ start = revlog.start
+ length = revlog.length
+
+ if len(revs) <= 1:
+ yield revs
+ return
+
+ nextrev = len(revlog)
+ nextoffset = revlog.end(nextrev - 1)
+
+ if deltainfo is None:
+ deltachainspan = segmentspan(revlog, revs)
+ chainpayload = sum(length(r) for r in revs)
+ else:
+ deltachainspan = deltainfo.distance
+ chainpayload = deltainfo.compresseddeltalen
+
+ if deltachainspan < mingapsize:
+ yield revs
+ return
+
+ readdata = deltachainspan
+
+ if deltachainspan:
+ density = chainpayload / float(deltachainspan)
+ else:
+ density = 1.0
+
+ if density >= targetdensity:
+ yield revs
+ return
+
+ if deltainfo is not None and deltainfo.deltalen:
+ revs = list(revs)
+ revs.append(nextrev)
+
+ # Store the gaps in a heap to have them sorted by decreasing size
+ gapsheap = []
+ heapq.heapify(gapsheap)
+ prevend = None
+ for i, rev in enumerate(revs):
+ if rev < nextrev:
+ revstart = start(rev)
+ revlen = length(rev)
+ else:
+ revstart = nextoffset
+ revlen = deltainfo.deltalen
+
+ # Skip empty revisions to form larger holes
+ if revlen == 0:
+ continue
+
+ if prevend is not None:
+ gapsize = revstart - prevend
+ # only consider holes that are large enough
+ if gapsize > mingapsize:
+ heapq.heappush(gapsheap, (-gapsize, i))
+
+ prevend = revstart + revlen
+
+ # Collect the indices of the largest holes until the density is acceptable
+ indicesheap = []
+ heapq.heapify(indicesheap)
+ while gapsheap and density < targetdensity:
+ oppgapsize, gapidx = heapq.heappop(gapsheap)
+
+ heapq.heappush(indicesheap, gapidx)
+
+ # the gap sizes are stored as negatives to be sorted decreasingly
+ # by the heap
+ readdata -= (-oppgapsize)
+ if readdata > 0:
+ density = chainpayload / float(readdata)
+ else:
+ density = 1.0
+
+ # Cut the revs at collected indices
+ previdx = 0
+ while indicesheap:
+ idx = heapq.heappop(indicesheap)
+
+ chunk = _trimchunk(revlog, revs, previdx, idx)
+ if chunk:
+ yield chunk
+
+ previdx = idx
+
+ chunk = _trimchunk(revlog, revs, previdx)
+ if chunk:
+ yield chunk
+
+def _trimchunk(revlog, revs, startidx, endidx=None):
+ """returns revs[startidx:endidx] without empty trailing revs
+
+ Doctest Setup
+ >>> revlog = _testrevlog([
+ ... 5, #0
+ ... 10, #1
+ ... 12, #2
+ ... 12, #3 (empty)
+ ... 17, #4
+ ... 21, #5
+ ... 21, #6 (empty)
+ ... ])
+
+ Contiguous cases:
+ >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
+ [0, 1, 2, 3, 4, 5]
+ >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
+ [0, 1, 2, 3, 4]
+ >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
+ [0, 1, 2]
+ >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
+ [2]
+ >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
+ [3, 4, 5]
+ >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
+ [3, 4]
+
+ Discontiguous cases:
+ >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
+ [1, 3, 5]
+ >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
+ [1]
+ >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
+ [3, 5]
+ >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
+ [3, 5]
+ """
+ length = revlog.length
+
+ if endidx is None:
+ endidx = len(revs)
+
+ # If we have a non-emtpy delta candidate, there are nothing to trim
+ if revs[endidx - 1] < len(revlog):
+ # Trim empty revs at the end, except the very first revision of a chain
+ while (endidx > 1
+ and endidx > startidx
+ and length(revs[endidx - 1]) == 0):
+ endidx -= 1
+
+ return revs[startidx:endidx]
+
+def segmentspan(revlog, revs, deltainfo=None):
+ """Get the byte span of a segment of revisions
+
+ revs is a sorted array of revision numbers
+
+ >>> revlog = _testrevlog([
+ ... 5, #0
+ ... 10, #1
+ ... 12, #2
+ ... 12, #3 (empty)
+ ... 17, #4
+ ... ])
+
+ >>> segmentspan(revlog, [0, 1, 2, 3, 4])
+ 17
+ >>> segmentspan(revlog, [0, 4])
+ 17
+ >>> segmentspan(revlog, [3, 4])
+ 5
+ >>> segmentspan(revlog, [1, 2, 3,])
+ 7
+ >>> segmentspan(revlog, [1, 3])
+ 7
+ """
+ if not revs:
+ return 0
+ if deltainfo is not None and len(revlog) <= revs[-1]:
+ if len(revs) == 1:
+ return deltainfo.deltalen
+ offset = revlog.end(len(revlog) - 1)
+ end = deltainfo.deltalen + offset
+ else:
+ end = revlog.end(revs[-1])
+ return end - revlog.start(revs[0])
+
@attr.s(slots=True, frozen=True)
class _deltainfo(object):
distance = attr.ib()