Patchwork [2,of,5,v3] revlog: add flagprocessor

login
register
mail settings
Submitter Remi Chaintron
Date Nov. 23, 2016, 4:19 p.m.
Message ID <f45a31268675da806e7f.1479917954@iphonesleepsort.dhcp.thefacebook.com>
Download mbox | patch
Permalink /patch/17720/
State Superseded
Headers show

Comments

Remi Chaintron - Nov. 23, 2016, 4:19 p.m.
# HG changeset patch
# User Remi Chaintron <remi@fb.com>
# Date 1479916365 0
#      Wed Nov 23 15:52:45 2016 +0000
# Branch stable
# Node ID f45a31268675da806e7f9569b569aec8fefb29d5
# Parent  fe4fefc6aba95f613dba4ebff3abd38205fdd74f
revlog: add flagprocessor

Add a mechanism for extensions/subclasses of revlog to register transforms on
revlog operations, based on the revision flags.
Due to some operations being non-commutative, transforms applied in a specific
order when writing the contents of the revlog need to be applied in the reverse
order when reading.
In order to allow the composition of such operations, the flagprocessor applies
flags in the stable order defined by REVIDX_FLAGS_PROCESSING_ORDER constant.

Due to the fact storing flags on the server side might not be enabled, the
flagprocessor class also handles the case when no flags are set. In such a case,
it defers to enabled extensions to register default transformations and use
heuristics to determine what to do with the observed contents.

Patch

diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py
--- a/mercurial/bundlerepo.py
+++ b/mercurial/bundlerepo.py
@@ -147,7 +147,9 @@ 
             delta = self._chunk(chain.pop())
             text = mdiff.patches(text, [delta])
 
-        self.checkhash(text, node, rev=rev)
+        text, validatehash = self.processflags(node, text, self.flags(rev))
+        if validatehash is True:
+            self.checkhash(text, node, rev=rev)
         self._cache = (node, rev, text)
         return text
 
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -56,6 +56,10 @@ 
 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
 REVIDX_DEFAULT_FLAGS = 0
 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
+# stable order in which flags need to be processed by the flagprocessor
+REVIDX_FLAGS_PROCESSING_ORDER = [
+    REVIDX_ISCENSORED,
+]
 
 # max size of revlog with inline data
 _maxinline = 131072
@@ -76,6 +80,76 @@ 
 
 _nullhash = hashlib.sha1(nullid)
 
+class flagprocessor(object):
+    """process revlog objects contents based on flags
+
+    flagprocessor objects are the interface between revlog objects and
+    subclasses/extensions needing to change or update the contents of revlog
+    objects, based on flags.
+
+    The role of the flagprocessor is to apply transforms registered by
+    extensions and revlog subclasses in the order defined by
+    REVIDX_FLAGS_PROCESSING_ORDER and the type (read or write) of operation.
+    This allows the flag processor to modify the contents of revlog objects and
+    handle the composition of non-commutative operations.
+    """
+
+    def __init__(self, revlogobject):
+        self.transformmap = {}
+        self.revlogobject = revlogobject
+
+    def register(self, transformmap):
+        """Register transforms to be applied for flags.
+
+        ``transformmap`` - a map of flag to transform
+        """
+        for flag in transformmap:
+            if flag in REVIDX_FLAGS_PROCESSING_ORDER:
+                self.transformmap[flag] = transformmap[flag]
+
+    def unregister(self, transformmap):
+        """Unregister transforms for flags."""
+        for flag in transformmap:
+            if flag in REVIDX_FLAGS_PROCESSING_ORDER:
+                self.transformmap[flag] = None
+
+    def processflags(self, node, text, revisionflags, reverse=False):
+        """Process flags and apply registered transforms.
+
+        ``node`` - the noideid of revision
+        ``text`` - the revision data to process
+        ``revisionflags`` - the flags applied to the revision
+        ``reverse`` - an optional argument describing whether the flags should
+          be processed in order according to the ``flagprocessor`` flag
+          priority. The flags should be processed in order of priority when
+          reading revisions, and in reverse order when writing revisions.
+
+        Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
+        processed text and ``validatehash`` is a bool indicating whether the
+        returned text should be checked for hash integrity.
+        """
+        validatehash = True
+
+        # Depending on the operation (read or write), the order might be
+        # reversed due to non-commutative transforms.
+        orderedflags = REVIDX_FLAGS_PROCESSING_ORDER
+        if reverse is True:
+            orderedflags = reversed(orderedflags)
+
+        for flag in orderedflags:
+            # If a transform has been registered for a known flag, apply it and
+            # update the result tuple.
+            # If no flag is set but transforms have been registered, it is
+            # assumed that the repository does not send flags over the wire and
+            # the extensions implement heuristics to determine what to do based
+            # on the contents of the filelog.
+            if flag & revisionflags or revisionflags == REVIDX_DEFAULT_FLAGS:
+                f = self.transformmap.get(flag, None)
+                if f is not None:
+                    text, validatehash = f(self.revlogobject, text)
+
+        return text, validatehash
+
 def hash(text, p1, p2):
     """generate a hash from the given text and its parent hashes
 
@@ -285,6 +359,7 @@ 
         self.version = v
         self._inline = v & REVLOGNGINLINEDATA
         self._generaldelta = v & REVLOGGENERALDELTA
+        self.flagprocessor = flagprocessor(self)
         flags = v & ~0xFFFF
         fmt = v & 0xFFFF
         if fmt == REVLOGV0 and flags:
@@ -1221,7 +1296,11 @@ 
             bins = bins[1:]
 
         text = mdiff.patches(text, bins)
-        self.checkhash(text, node, rev=rev)
+
+        text, validatehash = self.processflags(node, text, self.flags(rev))
+        if validatehash is True:
+            self.checkhash(text, node, rev=rev)
+
         self._cache = (node, rev, text)
         return text
 
@@ -1233,6 +1312,28 @@ 
         """
         return hash(text, p1, p2)
 
+    def processflags(self, node, text, flags, reverse=False):
+        """Process flags and apply registered transforms.
+
+        cf. `flagprocessor.processflags`` method for argument and return values
+        description.
+
+        ``node`` - the noideid of revision
+        ``text`` - the revision data to process
+        ``flags`` - the flags applied to the revision
+        ``reverse`` - an optional argument describing whether the flags should
+          be processed in order according to the ``flagprocessor`` flag
+          priority. The flags should be processed in order of priority when
+          reading revisions, and in reverse order when writing revisions.
+          This allows for non-reflexive operations in extensions.
+
+        Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
+        processed text and ``validatehash`` is a bool indicating whether the
+        returned text should be checked for hash integrity.
+        """
+        return self.flagprocessor.processflags(node, text, flags,
+                                               reverse=reverse)
+
     def checkhash(self, text, node, p1=None, p2=None, rev=None):
         """Check node hash integrity.
 
@@ -1415,7 +1516,10 @@ 
                 basetext = self.revision(self.node(baserev), _df=fh)
                 btext[0] = mdiff.patch(basetext, delta)
             try:
-                self.checkhash(btext[0], node, p1=p1, p2=p2)
+                btext[0], validatehash = self.processflags(node, btext[0],
+                                                           flags, reverse=True)
+                if validatehash is True:
+                    self.checkhash(btext[0], node, p1=p1, p2=p2)
                 if flags & REVIDX_ISCENSORED:
                     raise RevlogError(_('node %s is not censored') % node)
             except CensoredNodeError: