Patchwork [11,of,13] revlog: add function to slice chunk down to a given size

login
register
mail settings
Submitter Boris Feld
Date July 10, 2018, 1:27 p.m.
Message ID <068687c4911133c6f9d3.1531229240@FB-lair>
Download mbox | patch
Permalink /patch/32750/
State Accepted
Headers show

Comments

Boris Feld - July 10, 2018, 1:27 p.m.
# HG changeset patch
# User Boris Feld <boris.feld@octobus.net>
# Date 1531216590 -7200
#      Tue Jul 10 11:56:30 2018 +0200
# Node ID 068687c4911133c6f9d3f5c103634f7648305cc8
# Parent  bdc253d8f2cfdbe1a8977a2b330a3950b108b036
# EXP-Topic write-for-sparse-read
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 068687c49111
revlog: add function to slice chunk down to a given size

It is possible to encounter situations where the slicing based on density did
not achieve chunk smaller than the 4*textlength limit. To avoid extra memory
consumption in those cases, we need to be able to break down chunk to a given
size. Actual caller comes in the next changesets.
Boris Feld - July 11, 2018, 7:29 a.m.
It looks like the tests are broken because of one of the in-flight change:

     diff --git a/mercurial/revlog.py b/mercurial/revlog.py
     --- a/mercurial/revlog.py
     +++ b/mercurial/revlog.py
     @@ -416,8 +416,7 @@ def _slicechunktosize(revlog, revs, targ
          startrevidx = 0
          startdata = revlog.start(revs[0])
          endrevidx = 0
     -    iterrevs = enumerate(revs)
     -    next(iterrevs) # skip first rev
     +    iterrevs = enumerate(revs, 1)
          for idx, r in iterrevs:
              span = revlog.end(r) - startdata
              if span <= targetsize:


I tried reverting this change and the tests are green again, should I 
send a follow-up or could we amend the draft changeset?


On 10/07/2018 15:27, Boris Feld wrote:
> # HG changeset patch
> # User Boris Feld <boris.feld@octobus.net>
> # Date 1531216590 -7200
> #      Tue Jul 10 11:56:30 2018 +0200
> # Node ID 068687c4911133c6f9d3f5c103634f7648305cc8
> # Parent  bdc253d8f2cfdbe1a8977a2b330a3950b108b036
> # EXP-Topic write-for-sparse-read
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 068687c49111
> revlog: add function to slice chunk down to a given size
>
> It is possible to encounter situations where the slicing based on density did
> not achieve chunk smaller than the 4*textlength limit. To avoid extra memory
> consumption in those cases, we need to be able to break down chunk to a given
> size. Actual caller comes in the next changesets.
>
> diff --git a/mercurial/revlog.py b/mercurial/revlog.py
> --- a/mercurial/revlog.py
> +++ b/mercurial/revlog.py
> @@ -338,6 +338,83 @@ def _slicechunk(revlog, revs):
>                                         revlog._srmingapsize):
>           yield chunk
>   
> +def _slicechunktosize(revlog, revs, targetsize):
> +    """slice revs to match the target size
> +
> +    This is intended to be used on chunk that density slicing selected by that
> +    are still too large compared to the read garantee of revlog. This might
> +    happens when "minimal gap size" interrupted the slicing or when chain are
> +    built in a way that create large blocks next to each other.
> +
> +    >>> revlog = _testrevlog([
> +    ...  3,  #0 (3)
> +    ...  5,  #1 (2)
> +    ...  6,  #2 (1)
> +    ...  8,  #3 (2)
> +    ...  8,  #4 (empty)
> +    ...  11, #5 (3)
> +    ...  12, #6 (1)
> +    ...  13, #7 (1)
> +    ...  14, #8 (1)
> +    ... ])
> +
> +    Cases where chunk is already small enough
> +    >>> list(_slicechunktosize(revlog, [0], 3))
> +    [[0]]
> +    >>> list(_slicechunktosize(revlog, [6, 7], 3))
> +    [[6, 7]]
> +    >>> list(_slicechunktosize(revlog, [0], None))
> +    [[0]]
> +    >>> list(_slicechunktosize(revlog, [6, 7], None))
> +    [[6, 7]]
> +
> +    cases where we need actual slicing
> +    >>> list(_slicechunktosize(revlog, [0, 1], 3))
> +    [[0], [1]]
> +    >>> list(_slicechunktosize(revlog, [1, 3], 3))
> +    [[1], [3]]
> +    >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
> +    [[1, 2], [3]]
> +    >>> list(_slicechunktosize(revlog, [3, 5], 3))
> +    [[3], [5]]
> +    >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
> +    [[3], [5]]
> +    >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
> +    [[5], [6, 7, 8]]
> +    >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
> +    [[0], [1, 2], [3], [5], [6, 7, 8]]
> +
> +    Case with too large individual chunk (must return valid chunk)
> +    >>> list(_slicechunktosize(revlog, [0, 1], 2))
> +    [[0], [1]]
> +    >>> list(_slicechunktosize(revlog, [1, 3], 1))
> +    [[1], [3]]
> +    >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
> +    [[3], [5]]
> +    """
> +    assert targetsize is None or 0 <= targetsize
> +    if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
> +        yield revs
> +        return
> +
> +    startrevidx = 0
> +    startdata = revlog.start(revs[0])
> +    endrevidx = 0
> +    iterrevs = enumerate(revs)
> +    next(iterrevs) # skip first rev
> +    for idx, r in iterrevs:
> +        span = revlog.end(r) - startdata
> +        if span <= targetsize:
> +            endrevidx = idx
> +        else:
> +            chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
> +            if chunk:
> +                yield chunk
> +            startrevidx = idx
> +            startdata = revlog.start(r)
> +            endrevidx = idx
> +    yield _trimchunk(revlog, revs, startrevidx)
> +
>   def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
>       """slice revs to reduce the amount of unrelated data to be read from disk.
>   
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Gregory Szorc - July 11, 2018, 7:37 a.m.
On Wed, Jul 11, 2018 at 12:29 AM, Boris FELD <boris.feld@octobus.net> wrote:

> It looks like the tests are broken because of one of the in-flight change:
>
>     diff --git a/mercurial/revlog.py b/mercurial/revlog.py
>     --- a/mercurial/revlog.py
>     +++ b/mercurial/revlog.py
>     @@ -416,8 +416,7 @@ def _slicechunktosize(revlog, revs, targ
>          startrevidx = 0
>          startdata = revlog.start(revs[0])
>          endrevidx = 0
>     -    iterrevs = enumerate(revs)
>     -    next(iterrevs) # skip first rev
>     +    iterrevs = enumerate(revs, 1)
>          for idx, r in iterrevs:
>              span = revlog.end(r) - startdata
>              if span <= targetsize:
>
>
> I tried reverting this change and the tests are green again, should I send
> a follow-up or could we amend the draft changeset?


I amended the top 3 changesets on hg-committed to use the original version.


>
>
>
> On 10/07/2018 15:27, Boris Feld wrote:
>
>> # HG changeset patch
>> # User Boris Feld <boris.feld@octobus.net>
>> # Date 1531216590 -7200
>> #      Tue Jul 10 11:56:30 2018 +0200
>> # Node ID 068687c4911133c6f9d3f5c103634f7648305cc8
>> # Parent  bdc253d8f2cfdbe1a8977a2b330a3950b108b036
>> # EXP-Topic write-for-sparse-read
>> # Available At https://bitbucket.org/octobus/mercurial-devel/
>> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r
>> 068687c49111
>> revlog: add function to slice chunk down to a given size
>>
>> It is possible to encounter situations where the slicing based on density
>> did
>> not achieve chunk smaller than the 4*textlength limit. To avoid extra
>> memory
>> consumption in those cases, we need to be able to break down chunk to a
>> given
>> size. Actual caller comes in the next changesets.
>>
>> diff --git a/mercurial/revlog.py b/mercurial/revlog.py
>> --- a/mercurial/revlog.py
>> +++ b/mercurial/revlog.py
>> @@ -338,6 +338,83 @@ def _slicechunk(revlog, revs):
>>                                         revlog._srmingapsize):
>>           yield chunk
>>   +def _slicechunktosize(revlog, revs, targetsize):
>> +    """slice revs to match the target size
>> +
>> +    This is intended to be used on chunk that density slicing selected
>> by that
>> +    are still too large compared to the read garantee of revlog. This
>> might
>> +    happens when "minimal gap size" interrupted the slicing or when
>> chain are
>> +    built in a way that create large blocks next to each other.
>> +
>> +    >>> revlog = _testrevlog([
>> +    ...  3,  #0 (3)
>> +    ...  5,  #1 (2)
>> +    ...  6,  #2 (1)
>> +    ...  8,  #3 (2)
>> +    ...  8,  #4 (empty)
>> +    ...  11, #5 (3)
>> +    ...  12, #6 (1)
>> +    ...  13, #7 (1)
>> +    ...  14, #8 (1)
>> +    ... ])
>> +
>> +    Cases where chunk is already small enough
>> +    >>> list(_slicechunktosize(revlog, [0], 3))
>> +    [[0]]
>> +    >>> list(_slicechunktosize(revlog, [6, 7], 3))
>> +    [[6, 7]]
>> +    >>> list(_slicechunktosize(revlog, [0], None))
>> +    [[0]]
>> +    >>> list(_slicechunktosize(revlog, [6, 7], None))
>> +    [[6, 7]]
>> +
>> +    cases where we need actual slicing
>> +    >>> list(_slicechunktosize(revlog, [0, 1], 3))
>> +    [[0], [1]]
>> +    >>> list(_slicechunktosize(revlog, [1, 3], 3))
>> +    [[1], [3]]
>> +    >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
>> +    [[1, 2], [3]]
>> +    >>> list(_slicechunktosize(revlog, [3, 5], 3))
>> +    [[3], [5]]
>> +    >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
>> +    [[3], [5]]
>> +    >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
>> +    [[5], [6, 7, 8]]
>> +    >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
>> +    [[0], [1, 2], [3], [5], [6, 7, 8]]
>> +
>> +    Case with too large individual chunk (must return valid chunk)
>> +    >>> list(_slicechunktosize(revlog, [0, 1], 2))
>> +    [[0], [1]]
>> +    >>> list(_slicechunktosize(revlog, [1, 3], 1))
>> +    [[1], [3]]
>> +    >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
>> +    [[3], [5]]
>> +    """
>> +    assert targetsize is None or 0 <= targetsize
>> +    if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
>> +        yield revs
>> +        return
>> +
>> +    startrevidx = 0
>> +    startdata = revlog.start(revs[0])
>> +    endrevidx = 0
>> +    iterrevs = enumerate(revs)
>> +    next(iterrevs) # skip first rev
>> +    for idx, r in iterrevs:
>> +        span = revlog.end(r) - startdata
>> +        if span <= targetsize:
>> +            endrevidx = idx
>> +        else:
>> +            chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
>> +            if chunk:
>> +                yield chunk
>> +            startrevidx = idx
>> +            startdata = revlog.start(r)
>> +            endrevidx = idx
>> +    yield _trimchunk(revlog, revs, startrevidx)
>> +
>>   def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
>>       """slice revs to reduce the amount of unrelated data to be read
>> from disk.
>>   _______________________________________________
>> Mercurial-devel mailing list
>> Mercurial-devel@mercurial-scm.org
>> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>>
>
>

Patch

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -338,6 +338,83 @@  def _slicechunk(revlog, revs):
                                       revlog._srmingapsize):
         yield chunk
 
+def _slicechunktosize(revlog, revs, targetsize):
+    """slice revs to match the target size
+
+    This is intended to be used on chunk that density slicing selected by that
+    are still too large compared to the read garantee of revlog. This might
+    happens when "minimal gap size" interrupted the slicing or when chain are
+    built in a way that create large blocks next to each other.
+
+    >>> revlog = _testrevlog([
+    ...  3,  #0 (3)
+    ...  5,  #1 (2)
+    ...  6,  #2 (1)
+    ...  8,  #3 (2)
+    ...  8,  #4 (empty)
+    ...  11, #5 (3)
+    ...  12, #6 (1)
+    ...  13, #7 (1)
+    ...  14, #8 (1)
+    ... ])
+
+    Cases where chunk is already small enough
+    >>> list(_slicechunktosize(revlog, [0], 3))
+    [[0]]
+    >>> list(_slicechunktosize(revlog, [6, 7], 3))
+    [[6, 7]]
+    >>> list(_slicechunktosize(revlog, [0], None))
+    [[0]]
+    >>> list(_slicechunktosize(revlog, [6, 7], None))
+    [[6, 7]]
+
+    cases where we need actual slicing
+    >>> list(_slicechunktosize(revlog, [0, 1], 3))
+    [[0], [1]]
+    >>> list(_slicechunktosize(revlog, [1, 3], 3))
+    [[1], [3]]
+    >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
+    [[1, 2], [3]]
+    >>> list(_slicechunktosize(revlog, [3, 5], 3))
+    [[3], [5]]
+    >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
+    [[3], [5]]
+    >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
+    [[5], [6, 7, 8]]
+    >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
+    [[0], [1, 2], [3], [5], [6, 7, 8]]
+
+    Case with too large individual chunk (must return valid chunk)
+    >>> list(_slicechunktosize(revlog, [0, 1], 2))
+    [[0], [1]]
+    >>> list(_slicechunktosize(revlog, [1, 3], 1))
+    [[1], [3]]
+    >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
+    [[3], [5]]
+    """
+    assert targetsize is None or 0 <= targetsize
+    if targetsize is None or _segmentspan(revlog, revs) <= targetsize:
+        yield revs
+        return
+
+    startrevidx = 0
+    startdata = revlog.start(revs[0])
+    endrevidx = 0
+    iterrevs = enumerate(revs)
+    next(iterrevs) # skip first rev
+    for idx, r in iterrevs:
+        span = revlog.end(r) - startdata
+        if span <= targetsize:
+            endrevidx = idx
+        else:
+            chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
+            if chunk:
+                yield chunk
+            startrevidx = idx
+            startdata = revlog.start(r)
+            endrevidx = idx
+    yield _trimchunk(revlog, revs, startrevidx)
+
 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
     """slice revs to reduce the amount of unrelated data to be read from disk.