Patchwork [07,of,11,V2] revlogdelta: always return a delta info object in finddeltainfo

login
register
mail settings
Submitter Boris Feld
Date Aug. 27, 2018, 10:06 a.m.
Message ID <2c73ef87329fd8f0a515.1535364394@FB-lair>
Download mbox | patch
Permalink /patch/34071/
State Accepted
Headers show

Comments

Boris Feld - Aug. 27, 2018, 10:06 a.m.
# HG changeset patch
# User Boris Feld <boris.feld@octobus.net>
# Date 1534387137 -7200
#      Thu Aug 16 04:38:57 2018 +0200
# Node ID 2c73ef87329fd8f0a515dad9e2cd6de938615aa7
# Parent  050101b1b6db5bb9a6d6e887ec3145f8bff4188b
# EXP-Topic sparse-snapshot
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 2c73ef87329f
revlogdelta: always return a delta info object in finddeltainfo

Previously, the method returned `None` when a full snapshot was needed. The
caller had to determine how to produce one itself.

In practice, building a `_deltainfo` object for a full snapshot is simple. So
we build it at the `finddeltainfo` level and always return a `_deltainfo`
object.

The caller can now simply process the `_deltainfo` return in all cases.
Gregory Szorc - Aug. 29, 2018, 4:52 p.m.
On Mon, Aug 27, 2018 at 3:06 AM Boris Feld <boris.feld@octobus.net> wrote:

> # HG changeset patch
> # User Boris Feld <boris.feld@octobus.net>
> # Date 1534387137 -7200
> #      Thu Aug 16 04:38:57 2018 +0200
> # Node ID 2c73ef87329fd8f0a515dad9e2cd6de938615aa7
> # Parent  050101b1b6db5bb9a6d6e887ec3145f8bff4188b
> # EXP-Topic sparse-snapshot
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r
> 2c73ef87329f
> revlogdelta: always return a delta info object in finddeltainfo
>
> Previously, the method returned `None` when a full snapshot was needed. The
> caller had to determine how to produce one itself.
>
> In practice, building a `_deltainfo` object for a full snapshot is simple.
> So
> we build it at the `finddeltainfo` level and always return a `_deltainfo`
> object.
>
> The caller can now simply process the `_deltainfo` return in all cases.
>
> diff --git a/mercurial/revlog.py b/mercurial/revlog.py
> --- a/mercurial/revlog.py
> +++ b/mercurial/revlog.py
> @@ -1967,31 +1967,23 @@ class revlog(object):
>
>          deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
>
> -        if deltainfo is not None:
> -            base = deltainfo.base
> -            chainbase = deltainfo.chainbase
> -            data = deltainfo.data
> -            l = deltainfo.deltalen
> -        else:
> -            rawtext = deltacomputer.buildtext(revinfo, fh)
> -            data = self.compress(rawtext)
> -            l = len(data[1]) + len(data[0])
> -            base = chainbase = curr
> -
> -        e = (offset_type(offset, flags), l, textlen,
> -             base, link, p1r, p2r, node)
> +        e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
> +             deltainfo.base, link, p1r, p2r, node)
>          self.index.append(e)
>          self.nodemap[node] = curr
>
>          entry = self._io.packentry(e, self.node, self.version, curr)
> -        self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
> +        self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
> +                         link, offset)
> +
> +        rawtext = btext[0]
>

This line gave me trouble during review. But that's only because the
existing code is hard to follow (we pass an array around then possibly
replace its first element so we can later use it in a cache). I would
happily review a follow-up patch that explicitly returns the rawtext from a
called function instead of mutating an array as a side-effect.


>
>          if alwayscache and rawtext is None:
>              rawtext = deltacomputer.buildtext(revinfo, fh)
>
>          if type(rawtext) == bytes: # only accept immutable objects
>              self._cache = (node, curr, rawtext)
> -        self._chainbasecache[curr] = chainbase
> +        self._chainbasecache[curr] = deltainfo.chainbase
>          return node
>
>      def _writeentry(self, transaction, ifh, dfh, entry, data, link,
> offset):
> diff --git a/mercurial/revlogutils/deltas.py
> b/mercurial/revlogutils/deltas.py
> --- a/mercurial/revlogutils/deltas.py
> +++ b/mercurial/revlogutils/deltas.py
> @@ -691,6 +691,19 @@ class deltacomputer(object):
>                            chainbase, chainlen, compresseddeltalen,
>                            snapshotdepth)
>
> +    def _fullsnapshotinfo(self, fh, revinfo):
> +        curr = len(self.revlog)
> +        rawtext = self.buildtext(revinfo, fh)
> +        data = self.revlog.compress(rawtext)
> +        compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
> +        deltabase = chainbase = curr
> +        snapshotdepth = 0
> +        chainlen = 1
> +
> +        return _deltainfo(dist, deltalen, data, deltabase,
> +                          chainbase, chainlen, compresseddeltalen,
> +                          snapshotdepth)
> +
>      def finddeltainfo(self, revinfo, fh):
>          """Find an acceptable delta against a candidate revision
>
> @@ -700,15 +713,18 @@ class deltacomputer(object):
>
>          Returns the first acceptable candidate revision, as ordered by
>          _getcandidaterevs
> +
> +        If no suitable deltabase is found, we return delta info for a full
> +        snapshot.
>          """
>          if not revinfo.textlen:
> -            return None # empty file do not need delta
> +            return self._fullsnapshotinfo(fh, revinfo)
>
>          # no delta for flag processor revision (see "candelta" for why)
>          # not calling candelta since only one revision needs test, also to
>          # avoid overhead fetching flags again.
>          if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
> -            return None
> +            return self._fullsnapshotinfo(fh, revinfo)
>
>          cachedelta = revinfo.cachedelta
>          p1 = revinfo.p1
> @@ -743,4 +759,6 @@ class deltacomputer(object):
>                  deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
>                  break
>
> +        if deltainfo is None:
> +            deltainfo = self._fullsnapshotinfo(fh, revinfo)
>          return deltainfo
>

Patch

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -1967,31 +1967,23 @@  class revlog(object):
 
         deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
 
-        if deltainfo is not None:
-            base = deltainfo.base
-            chainbase = deltainfo.chainbase
-            data = deltainfo.data
-            l = deltainfo.deltalen
-        else:
-            rawtext = deltacomputer.buildtext(revinfo, fh)
-            data = self.compress(rawtext)
-            l = len(data[1]) + len(data[0])
-            base = chainbase = curr
-
-        e = (offset_type(offset, flags), l, textlen,
-             base, link, p1r, p2r, node)
+        e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
+             deltainfo.base, link, p1r, p2r, node)
         self.index.append(e)
         self.nodemap[node] = curr
 
         entry = self._io.packentry(e, self.node, self.version, curr)
-        self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
+        self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
+                         link, offset)
+
+        rawtext = btext[0]
 
         if alwayscache and rawtext is None:
             rawtext = deltacomputer.buildtext(revinfo, fh)
 
         if type(rawtext) == bytes: # only accept immutable objects
             self._cache = (node, curr, rawtext)
-        self._chainbasecache[curr] = chainbase
+        self._chainbasecache[curr] = deltainfo.chainbase
         return node
 
     def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py
--- a/mercurial/revlogutils/deltas.py
+++ b/mercurial/revlogutils/deltas.py
@@ -691,6 +691,19 @@  class deltacomputer(object):
                           chainbase, chainlen, compresseddeltalen,
                           snapshotdepth)
 
+    def _fullsnapshotinfo(self, fh, revinfo):
+        curr = len(self.revlog)
+        rawtext = self.buildtext(revinfo, fh)
+        data = self.revlog.compress(rawtext)
+        compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
+        deltabase = chainbase = curr
+        snapshotdepth = 0
+        chainlen = 1
+
+        return _deltainfo(dist, deltalen, data, deltabase,
+                          chainbase, chainlen, compresseddeltalen,
+                          snapshotdepth)
+
     def finddeltainfo(self, revinfo, fh):
         """Find an acceptable delta against a candidate revision
 
@@ -700,15 +713,18 @@  class deltacomputer(object):
 
         Returns the first acceptable candidate revision, as ordered by
         _getcandidaterevs
+
+        If no suitable deltabase is found, we return delta info for a full
+        snapshot.
         """
         if not revinfo.textlen:
-            return None # empty file do not need delta
+            return self._fullsnapshotinfo(fh, revinfo)
 
         # no delta for flag processor revision (see "candelta" for why)
         # not calling candelta since only one revision needs test, also to
         # avoid overhead fetching flags again.
         if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
-            return None
+            return self._fullsnapshotinfo(fh, revinfo)
 
         cachedelta = revinfo.cachedelta
         p1 = revinfo.p1
@@ -743,4 +759,6 @@  class deltacomputer(object):
                 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
                 break
 
+        if deltainfo is None:
+            deltainfo = self._fullsnapshotinfo(fh, revinfo)
         return deltainfo