Patchwork [7,of,7] revlog: split `rawtext` retrieval out of _revisiondata

login
register
mail settings
Submitter Pierre-Yves David
Date Aug. 20, 2019, 4:37 p.m.
Message ID <9f906301d3f152b00935.1566319052@nodosa.octopoid.net>
Download mbox | patch
Permalink /patch/41373/
State Accepted
Headers show

Comments

Pierre-Yves David - Aug. 20, 2019, 4:37 p.m.
# HG changeset patch
# User Pierre-Yves David <pierre-yves.david@octobus.net>
# Date 1566313956 -7200
#      Tue Aug 20 17:12:36 2019 +0200
# Node ID 9f906301d3f152b00935a519fafc216499ead4df
# Parent  5b8bcfd5ef2cb077fe3c8bd2c4f62f693ebcf2fc
# EXP-Topic revisiondata
# Available At https://bitbucket.org/octobus/mercurial-devel/
#              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 9f906301d3f1
revlog: split `rawtext` retrieval out of _revisiondata

This part is reasonably independant. Having it on its own clarify the code flow
and will help code that inherit from revlog to overwrite specific area only.
Yuya Nishihara - Aug. 21, 2019, 12:54 p.m.
On Tue, 20 Aug 2019 18:37:32 +0200, Pierre-Yves David wrote:
> # HG changeset patch
> # User Pierre-Yves David <pierre-yves.david@octobus.net>
> # Date 1566313956 -7200
> #      Tue Aug 20 17:12:36 2019 +0200
> # Node ID 9f906301d3f152b00935a519fafc216499ead4df
> # Parent  5b8bcfd5ef2cb077fe3c8bd2c4f62f693ebcf2fc
> # EXP-Topic revisiondata
> # Available At https://bitbucket.org/octobus/mercurial-devel/
> #              hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 9f906301d3f1
> revlog: split `rawtext` retrieval out of _revisiondata

Queued the series, thanks.

> +        if raw and validated:
> +            # if we are don't want to process the raw text and that raw
> +            # text is cached, we can exit early.
> +            return rawtext
> +        if rev is None:
> +            rev = self.rev(node)
> +        # the revlog's flag for this revision
> +        # (usually alter its state or content)
> +        flags = self.flags(rev)
> +
> +        if validated and flags == REVIDX_DEFAULT_FLAGS:
> +            # no extra flags set, no flag processor runs, text = rawtext
> +            return rawtext
> +
> +        text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
> +        if validatehash:
> +            self.checkhash(text, node, rev=rev)
> +        if not validated:
> +            self._revisioncache = (node, rev, rawtext)

Maybe better to comment why updating cache if "not validated" as it
depends on internal behavior of _rawtext().

> +    def _rawtext(self, node, rev, _df=None):
> +        """return the possibly unvalidated rawtext for a revision

Nit: unvalidated sounds odd, but I don't know the right word.

> +        returns is (rev, rawtext, validated)

s/is//

Patch

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -1623,80 +1623,77 @@  class revlog(object):
         if node == nullid:
             return ""
 
-        # revision in the cache (could be useful to apply delta)
-        cachedrev = None
-        # the revlog's flag for this revision
-        # (usually alter its state or content)
-        flags = None
         # The text as stored inside the revlog. Might be the revision or might
         # need to be processed to retrieve the revision.
         rawtext = None
+
+        rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
+
+        if raw and validated:
+            # if we are don't want to process the raw text and that raw
+            # text is cached, we can exit early.
+            return rawtext
+        if rev is None:
+            rev = self.rev(node)
+        # the revlog's flag for this revision
+        # (usually alter its state or content)
+        flags = self.flags(rev)
+
+        if validated and flags == REVIDX_DEFAULT_FLAGS:
+            # no extra flags set, no flag processor runs, text = rawtext
+            return rawtext
+
+        text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
+        if validatehash:
+            self.checkhash(text, node, rev=rev)
+        if not validated:
+            self._revisioncache = (node, rev, rawtext)
+
+        return text
+
+    def _rawtext(self, node, rev, _df=None):
+        """return the possibly unvalidated rawtext for a revision
+
+        returns is (rev, rawtext, validated)
+        """
+
+        # revision in the cache (could be useful to apply delta)
+        cachedrev = None
         # An intermediate text to apply deltas to
         basetext = None
-        # Do we need to update the rawtext cache once it is validated ?
-        needcaching = True
 
         # Check is we are the entry in cache
         # The cache entry looks like (node, rev, rawtext)
         if self._revisioncache:
             if self._revisioncache[0] == node:
-                needcaching = False
-                # _cache only stores rawtext
-                # rawtext is reusable. but we might need to run flag processors
-                rawtext = self._revisioncache[2]
-                if raw:
-                    # if we are don't want to process the raw text and that raw
-                    # text is cached, we can exit early.
-                    return rawtext
-                # duplicated, but good for perf
-                if rev is None:
-                    rev = self.rev(node)
-                if flags is None:
-                    flags = self.flags(rev)
-                # no extra flags set, no flag processor runs, text = rawtext
-                if flags == REVIDX_DEFAULT_FLAGS:
-                    return rawtext
-
+                return (rev, self._revisioncache[2], True)
             cachedrev = self._revisioncache[1]
 
-        # look up what we need to read
-        if rawtext is None:
-            if rev is None:
-                rev = self.rev(node)
-
-            chain, stopped = self._deltachain(rev, stoprev=cachedrev)
-            if stopped:
-                basetext = self._revisioncache[2]
-
-            # drop cache to save memory
-            self._revisioncache = None
-
-            targetsize = None
-            rawsize = self.index[rev][2]
-            if 0 <= rawsize:
-                targetsize = 4 * rawsize
-
-            bins = self._chunks(chain, df=_df, targetsize=targetsize)
-            if basetext is None:
-                basetext = bytes(bins[0])
-                bins = bins[1:]
-
-            rawtext = mdiff.patches(basetext, bins)
-            del basetext # let us have a change to free memory early
-
-        if flags is None:
-            if rev is None:
-                rev = self.rev(node)
-            flags = self.flags(rev)
-
-        text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
-        if validatehash:
-            self.checkhash(text, node, rev=rev)
-
-        if needcaching:
-            self._revisioncache = (node, rev, rawtext)
-
-        return text
+        if rev is None:
+            rev = self.rev(node)
+
+        chain, stopped = self._deltachain(rev, stoprev=cachedrev)
+        if stopped:
+            basetext = self._revisioncache[2]
+
+        # drop cache to save memory, the caller is expected to
+        # update self._revisioncache after validating the text
+        self._revisioncache = None
+
+        targetsize = None
+        rawsize = self.index[rev][2]
+        if 0 <= rawsize:
+            targetsize = 4 * rawsize
+
+        bins = self._chunks(chain, df=_df, targetsize=targetsize)
+        if basetext is None:
+            basetext = bytes(bins[0])
+            bins = bins[1:]
+
+        rawtext = mdiff.patches(basetext, bins)
+        del basetext # let us have a change to free memory early
+        return (rev, rawtext, False)
+
 
     def rawdata(self, nodeorrev, _df=None):
         """return an uncompressed raw data of a given node or revision number.