Patchwork [STABLE] sparse-revlog: fix delta validity computation

login
register
mail settings
Submitter Boris Feld
Date Aug. 16, 2018, 9:01 a.m.
Message ID <1b78bebb7a8793b4b858.1534410071@FB-lair>
Download mbox | patch
Permalink /patch/33769/
State Accepted
Headers show

Comments

Boris Feld - Aug. 16, 2018, 9:01 a.m.
# HG changeset patch
# User Boris Feld <boris.feld@octobus.net>
# Date 1534337020 -7200
#      Wed Aug 15 14:43:40 2018 +0200
# Branch stable
# Node ID 1b78bebb7a8793b4b858d335b11f4deaf57dd128
# Parent  7e023ce26c7f5e800c778fb4ff76c6d7726666b3
# EXP-Topic fix-sparse
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 1b78bebb7a87
sparse-revlog: fix delta validity computation

When considering the validity of a delta with sparse-revlog, we check the size
of the largest read. To do so, we use some regular logic with the extra delta
information. Some of this logic was not handling this extra delta properly,
confusing it with "nullrev". This confusion with nullrev lead to wrong results
for this computation but preventing a crash.

Changeset 781b2720d2ac on default revealed this error, crashing. This
changeset fixes the logic on stable so that the computation is correct (and
the crash is averted).

The fix is made on stable as this will impact 4.7 clients interacting with
sparse-revlog repositories (eg: created by later version).
Gregory Szorc - Aug. 17, 2018, 10:50 p.m.
On Thu, Aug 16, 2018 at 2:01 AM, Boris Feld <boris.feld@octobus.net> wrote:

> # HG changeset patch
> # User Boris Feld <boris.feld@octobus.net>
> # Date 1534337020 -7200
> #      Wed Aug 15 14:43:40 2018 +0200
> # Branch stable
> # Node ID 1b78bebb7a8793b4b858d335b11f4deaf57dd128
> # Parent  7e023ce26c7f5e800c778fb4ff76c6d7726666b3
> # EXP-Topic fix-sparse
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r
> 1b78bebb7a87
> sparse-revlog: fix delta validity computation
>
> When considering the validity of a delta with sparse-revlog, we check the
> size
> of the largest read. To do so, we use some regular logic with the extra
> delta
> information. Some of this logic was not handling this extra delta properly,
> confusing it with "nullrev". This confusion with nullrev lead to wrong
> results
> for this computation but preventing a crash.
>
> Changeset 781b2720d2ac on default revealed this error, crashing. This
> changeset fixes the logic on stable so that the computation is correct (and
> the crash is averted).
>
> The fix is made on stable as this will impact 4.7 clients interacting with
> sparse-revlog repositories (eg: created by later version).
>

Queued for stable.


>
> diff --git a/mercurial/revlog.py b/mercurial/revlog.py
> --- a/mercurial/revlog.py
> +++ b/mercurial/revlog.py
> @@ -262,13 +262,17 @@ def _trimchunk(revlog, revs, startidx, e
>      if endidx is None:
>          endidx = len(revs)
>
> -    # Trim empty revs at the end, but never the very first revision of a
> chain
> -    while endidx > 1 and endidx > startidx and length(revs[endidx - 1])
> == 0:
> -        endidx -= 1
> +    # If we have a non-emtpy delta candidate, there are nothing to trim
> +    if revs[endidx - 1] < len(revlog):
> +        # Trim empty revs at the end, except the very first revision of a
> chain
> +        while (endidx > 1
> +                and endidx > startidx
> +                and length(revs[endidx - 1]) == 0):
> +            endidx -= 1
>
>      return revs[startidx:endidx]
>
> -def _segmentspan(revlog, revs):
> +def _segmentspan(revlog, revs, deltainfo=None):
>      """Get the byte span of a segment of revisions
>
>      revs is a sorted array of revision numbers
> @@ -294,7 +298,14 @@ def _segmentspan(revlog, revs):
>      """
>      if not revs:
>          return 0
> -    return revlog.end(revs[-1]) - revlog.start(revs[0])
> +    if deltainfo is not None and len(revlog) <= revs[-1]:
> +        if len(revs) == 1:
> +            return deltainfo.deltalen
> +        offset = revlog.end(len(revlog) - 1)
> +        end = deltainfo.deltalen + offset
> +    else:
> +        end = revlog.end(revs[-1])
> +    return end - revlog.start(revs[0])
>
>  def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
>      """slice revs to reduce the amount of unrelated data to be read from
> disk.
> @@ -526,7 +537,7 @@ def _slicechunktodensity(revlog, revs, d
>          yield revs
>          return
>
> -    if deltainfo is not None:
> +    if deltainfo is not None and deltainfo.deltalen:
>          revs = list(revs)
>          revs.append(nextrev)
>
> @@ -2444,7 +2455,8 @@ class revlog(object):
>                  deltachain = []
>
>              chunks = _slicechunk(self, deltachain, deltainfo)
> -            distance = max(map(lambda revs:_segmentspan(self, revs),
> chunks))
> +            all_span = [_segmentspan(self, revs, deltainfo) for revs in
> chunks]
> +            distance = max(all_span)
>          else:
>              distance = deltainfo.distance
>
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
>

Patch

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -262,13 +262,17 @@  def _trimchunk(revlog, revs, startidx, e
     if endidx is None:
         endidx = len(revs)
 
-    # Trim empty revs at the end, but never the very first revision of a chain
-    while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
-        endidx -= 1
+    # If we have a non-emtpy delta candidate, there are nothing to trim
+    if revs[endidx - 1] < len(revlog):
+        # Trim empty revs at the end, except the very first revision of a chain
+        while (endidx > 1
+                and endidx > startidx
+                and length(revs[endidx - 1]) == 0):
+            endidx -= 1
 
     return revs[startidx:endidx]
 
-def _segmentspan(revlog, revs):
+def _segmentspan(revlog, revs, deltainfo=None):
     """Get the byte span of a segment of revisions
 
     revs is a sorted array of revision numbers
@@ -294,7 +298,14 @@  def _segmentspan(revlog, revs):
     """
     if not revs:
         return 0
-    return revlog.end(revs[-1]) - revlog.start(revs[0])
+    if deltainfo is not None and len(revlog) <= revs[-1]:
+        if len(revs) == 1:
+            return deltainfo.deltalen
+        offset = revlog.end(len(revlog) - 1)
+        end = deltainfo.deltalen + offset
+    else:
+        end = revlog.end(revs[-1])
+    return end - revlog.start(revs[0])
 
 def _slicechunk(revlog, revs, deltainfo=None, targetsize=None):
     """slice revs to reduce the amount of unrelated data to be read from disk.
@@ -526,7 +537,7 @@  def _slicechunktodensity(revlog, revs, d
         yield revs
         return
 
-    if deltainfo is not None:
+    if deltainfo is not None and deltainfo.deltalen:
         revs = list(revs)
         revs.append(nextrev)
 
@@ -2444,7 +2455,8 @@  class revlog(object):
                 deltachain = []
 
             chunks = _slicechunk(self, deltachain, deltainfo)
-            distance = max(map(lambda revs:_segmentspan(self, revs), chunks))
+            all_span = [_segmentspan(self, revs, deltainfo) for revs in chunks]
+            distance = max(all_span)
         else:
             distance = deltainfo.distance