Submitter | Boris Feld |
---|---|
Date | Aug. 27, 2018, 10:06 a.m. |
Message ID | <2c73ef87329fd8f0a515.1535364394@FB-lair> |
Download | mbox | patch |
Permalink | /patch/34071/ |
State | Accepted |
Headers | show |
Comments
On Mon, Aug 27, 2018 at 3:06 AM Boris Feld <boris.feld@octobus.net> wrote: > # HG changeset patch > # User Boris Feld <boris.feld@octobus.net> > # Date 1534387137 -7200 > # Thu Aug 16 04:38:57 2018 +0200 > # Node ID 2c73ef87329fd8f0a515dad9e2cd6de938615aa7 > # Parent 050101b1b6db5bb9a6d6e887ec3145f8bff4188b > # EXP-Topic sparse-snapshot > # Available At https://bitbucket.org/octobus/mercurial-devel/ > # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r > 2c73ef87329f > revlogdelta: always return a delta info object in finddeltainfo > > Previously, the method returned `None` when a full snapshot was needed. The > caller had to determine how to produce one itself. > > In practice, building a `_deltainfo` object for a full snapshot is simple. > So > we build it at the `finddeltainfo` level and always return a `_deltainfo` > object. > > The caller can now simply process the `_deltainfo` return in all cases. > > diff --git a/mercurial/revlog.py b/mercurial/revlog.py > --- a/mercurial/revlog.py > +++ b/mercurial/revlog.py > @@ -1967,31 +1967,23 @@ class revlog(object): > > deltainfo = deltacomputer.finddeltainfo(revinfo, fh) > > - if deltainfo is not None: > - base = deltainfo.base > - chainbase = deltainfo.chainbase > - data = deltainfo.data > - l = deltainfo.deltalen > - else: > - rawtext = deltacomputer.buildtext(revinfo, fh) > - data = self.compress(rawtext) > - l = len(data[1]) + len(data[0]) > - base = chainbase = curr > - > - e = (offset_type(offset, flags), l, textlen, > - base, link, p1r, p2r, node) > + e = (offset_type(offset, flags), deltainfo.deltalen, textlen, > + deltainfo.base, link, p1r, p2r, node) > self.index.append(e) > self.nodemap[node] = curr > > entry = self._io.packentry(e, self.node, self.version, curr) > - self._writeentry(transaction, ifh, dfh, entry, data, link, offset) > + self._writeentry(transaction, ifh, dfh, entry, deltainfo.data, > + link, offset) > + > + rawtext = btext[0] > This line gave me trouble during review. But that's only because the existing code is hard to follow (we pass an array around then possibly replace its first element so we can later use it in a cache). I would happily review a follow-up patch that explicitly returns the rawtext from a called function instead of mutating an array as a side-effect. > > if alwayscache and rawtext is None: > rawtext = deltacomputer.buildtext(revinfo, fh) > > if type(rawtext) == bytes: # only accept immutable objects > self._cache = (node, curr, rawtext) > - self._chainbasecache[curr] = chainbase > + self._chainbasecache[curr] = deltainfo.chainbase > return node > > def _writeentry(self, transaction, ifh, dfh, entry, data, link, > offset): > diff --git a/mercurial/revlogutils/deltas.py > b/mercurial/revlogutils/deltas.py > --- a/mercurial/revlogutils/deltas.py > +++ b/mercurial/revlogutils/deltas.py > @@ -691,6 +691,19 @@ class deltacomputer(object): > chainbase, chainlen, compresseddeltalen, > snapshotdepth) > > + def _fullsnapshotinfo(self, fh, revinfo): > + curr = len(self.revlog) > + rawtext = self.buildtext(revinfo, fh) > + data = self.revlog.compress(rawtext) > + compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0]) > + deltabase = chainbase = curr > + snapshotdepth = 0 > + chainlen = 1 > + > + return _deltainfo(dist, deltalen, data, deltabase, > + chainbase, chainlen, compresseddeltalen, > + snapshotdepth) > + > def finddeltainfo(self, revinfo, fh): > """Find an acceptable delta against a candidate revision > > @@ -700,15 +713,18 @@ class deltacomputer(object): > > Returns the first acceptable candidate revision, as ordered by > _getcandidaterevs > + > + If no suitable deltabase is found, we return delta info for a full > + snapshot. > """ > if not revinfo.textlen: > - return None # empty file do not need delta > + return self._fullsnapshotinfo(fh, revinfo) > > # no delta for flag processor revision (see "candelta" for why) > # not calling candelta since only one revision needs test, also to > # avoid overhead fetching flags again. > if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS: > - return None > + return self._fullsnapshotinfo(fh, revinfo) > > cachedelta = revinfo.cachedelta > p1 = revinfo.p1 > @@ -743,4 +759,6 @@ class deltacomputer(object): > deltainfo = min(nominateddeltas, key=lambda x: x.deltalen) > break > > + if deltainfo is None: > + deltainfo = self._fullsnapshotinfo(fh, revinfo) > return deltainfo >
Patch
diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -1967,31 +1967,23 @@ class revlog(object): deltainfo = deltacomputer.finddeltainfo(revinfo, fh) - if deltainfo is not None: - base = deltainfo.base - chainbase = deltainfo.chainbase - data = deltainfo.data - l = deltainfo.deltalen - else: - rawtext = deltacomputer.buildtext(revinfo, fh) - data = self.compress(rawtext) - l = len(data[1]) + len(data[0]) - base = chainbase = curr - - e = (offset_type(offset, flags), l, textlen, - base, link, p1r, p2r, node) + e = (offset_type(offset, flags), deltainfo.deltalen, textlen, + deltainfo.base, link, p1r, p2r, node) self.index.append(e) self.nodemap[node] = curr entry = self._io.packentry(e, self.node, self.version, curr) - self._writeentry(transaction, ifh, dfh, entry, data, link, offset) + self._writeentry(transaction, ifh, dfh, entry, deltainfo.data, + link, offset) + + rawtext = btext[0] if alwayscache and rawtext is None: rawtext = deltacomputer.buildtext(revinfo, fh) if type(rawtext) == bytes: # only accept immutable objects self._cache = (node, curr, rawtext) - self._chainbasecache[curr] = chainbase + self._chainbasecache[curr] = deltainfo.chainbase return node def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset): diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py --- a/mercurial/revlogutils/deltas.py +++ b/mercurial/revlogutils/deltas.py @@ -691,6 +691,19 @@ class deltacomputer(object): chainbase, chainlen, compresseddeltalen, snapshotdepth) + def _fullsnapshotinfo(self, fh, revinfo): + curr = len(self.revlog) + rawtext = self.buildtext(revinfo, fh) + data = self.revlog.compress(rawtext) + compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0]) + deltabase = chainbase = curr + snapshotdepth = 0 + chainlen = 1 + + return _deltainfo(dist, deltalen, data, deltabase, + chainbase, chainlen, compresseddeltalen, + snapshotdepth) + def finddeltainfo(self, revinfo, fh): """Find an acceptable delta against a candidate revision @@ -700,15 +713,18 @@ class deltacomputer(object): Returns the first acceptable candidate revision, as ordered by _getcandidaterevs + + If no suitable deltabase is found, we return delta info for a full + snapshot. """ if not revinfo.textlen: - return None # empty file do not need delta + return self._fullsnapshotinfo(fh, revinfo) # no delta for flag processor revision (see "candelta" for why) # not calling candelta since only one revision needs test, also to # avoid overhead fetching flags again. if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS: - return None + return self._fullsnapshotinfo(fh, revinfo) cachedelta = revinfo.cachedelta p1 = revinfo.p1 @@ -743,4 +759,6 @@ class deltacomputer(object): deltainfo = min(nominateddeltas, key=lambda x: x.deltalen) break + if deltainfo is None: + deltainfo = self._fullsnapshotinfo(fh, revinfo) return deltainfo