Patchwork [4,of,7,flagprocessor,v7] revlog: flag processor

login
register
mail settings
Submitter Remi Chaintron
Date Jan. 2, 2017, 5:51 p.m.
Message ID <c59965f26abba42afccd.1483379474@remi-mbp2>
Download mbox | patch
Permalink /patch/18071/
State Superseded
Delegated to: Pierre-Yves David
Headers show

Comments

Remi Chaintron - Jan. 2, 2017, 5:51 p.m.
# HG changeset patch
# User Remi Chaintron <remi@fb.com>
# Date 1483375129 18000
#      Mon Jan 02 11:38:49 2017 -0500
# Node ID c59965f26abba42afccd05881c5f3a73d7f2e1d1
# Parent  2212e5fd10eef8d74e4debc1fd0603fed21e6e57
revlog: flag processor

Add the ability for revlog objects to process revision flags and apply
registered transforms on read/write operations.

This patch introduces:
- the 'revlog._processflags()' method that looks at revision flags and applies
  transforms registered on them. Due to the need to handle non-commutative
  operations, flag transforms are applied in stable order but the order in which
  the transforms are applied is reversed between read and write operations.
- the 'addflagtransform()' method allowing to registered transforms on flags.
  Transforms are defined as a 3-tuple of (read, write, raw) functions to be
  applied depending on the operation.

Patch

diff --git a/mercurial/bundlerepo.py b/mercurial/bundlerepo.py
--- a/mercurial/bundlerepo.py
+++ b/mercurial/bundlerepo.py
@@ -148,7 +148,10 @@ 
             delta = self._chunk(chain.pop())
             text = mdiff.patches(text, [delta])
 
-        self.checkhash(text, node, rev=rev)
+        text, validatehash = self._processflags(text, self.flags(rev),
+                                                'read', raw=raw)
+        if validatehash:
+            self.checkhash(text, node, rev=rev)
         self._cache = (node, rev, text)
         return text
 
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -56,6 +56,10 @@ 
 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
 REVIDX_DEFAULT_FLAGS = 0
 REVIDX_KNOWN_FLAGS = REVIDX_ISCENSORED
+# stable order in which flags need to be processed and their transforms applied
+REVIDX_FLAGS_ORDER = [
+    REVIDX_ISCENSORED,
+]
 
 # max size of revlog with inline data
 _maxinline = 131072
@@ -64,6 +68,36 @@ 
 RevlogError = error.RevlogError
 LookupError = error.LookupError
 CensoredNodeError = error.CensoredNodeError
+ProgrammingError = error.ProgrammingError
+
+# Store flag transforms (cf. 'addflagtransform()' to register)
+_flagtransforms = { }
+
+def addflagtransform(flag, transform):
+    """Register transforms on revision data flags.
+
+    Invariant:
+    - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
+    - Only one transform can be registered on a specific flag.
+    - the transform must be a 3-tuple (read, write, raw) of functions following
+      this signature: ``f(self, text) -> newtext, bool`` where bool indicates
+      whether newtext can be used to check hash integrity.
+    """
+    if not flag & REVIDX_KNOWN_FLAGS:
+        raise ProgrammingError(_(
+            "cannot register transform on unknown flag '%x'." % (flag)))
+    if not flag in REVIDX_FLAGS_ORDER:
+        raise ProgrammingError(_(
+            "flag '%x' undefined in REVIDX_FLAGS_ORDER." % (flag)))
+    if flag in _flagtransforms:
+        raise error.Abort(_(
+            "cannot register multiple transforms on flag '%x'." % (flag)))
+    try:
+        read, write, raw = transform
+    except ValueError:
+        raise ProgrammingError(_(
+            'transform must be a 3-tuple (read, write, raw).'))
+    _flagtransforms[flag] = transform
 
 def getoffset(q):
     return int(q >> 16)
@@ -1231,11 +1265,6 @@ 
         if rev is None:
             rev = self.rev(node)
 
-        # check rev flags
-        if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
-            raise RevlogError(_('incompatible revision flag %x') %
-                              (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
-
         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
         if stopped:
             text = self._cache[2]
@@ -1249,7 +1278,12 @@ 
             bins = bins[1:]
 
         text = mdiff.patches(text, bins)
-        self.checkhash(text, node, rev=rev)
+
+        text, validatehash = self._processflags(text, self.flags(rev), 'read',
+                                               raw=raw)
+        if validatehash:
+            self.checkhash(text, node, rev=rev)
+
         self._cache = (node, rev, text)
         return text
 
@@ -1261,6 +1295,57 @@ 
         """
         return hash(text, p1, p2)
 
+    def _processflags(self, text, flags, operation, raw=False):
+        """Process revision flags and apply registered transforms.
+
+        ``text`` - the revision data to process
+        ``flags`` - the revision flags
+        ``operation`` - the operation being performed (read of write)
+        ``raw`` - an optional argument describing if the raw transform should be
+        applied.
+
+        This method processes the flags in the order (or reverse order if
+        ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
+        transforms registered for present flags. The order of flags defined in
+        REVIDX_FLAGS_ORDER needs to be stable for non-commutative
+        operations.
+
+        Note: If the ``raw`` argument is set, it has precedence over the
+        operation. It is usually set in the context of changegroup generation or
+        debug commands as these typically are interested in the raw contents.
+
+        Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
+        processed text and ``validatehash`` is a bool indicating whether the
+        returned text should be checked for hash integrity.
+        """
+        if not operation in ['read', 'write']:
+            raise ProgrammingError(_("invalid operation '%s'") % (operation))
+        # Check all flags are known.
+        if flags & ~REVIDX_KNOWN_FLAGS:
+            raise RevlogError(_("incompatible revision flag '%x'") %
+                              (flags & ~REVIDX_KNOWN_FLAGS))
+        validatehash = True
+        # Depending on the operation (read or write), the order might be
+        # reversed due to non-commutative transforms.
+        orderedflags = REVIDX_FLAGS_ORDER
+        if operation == 'write':
+            orderedflags = reversed(orderedflags)
+
+        for flag in orderedflags:
+            # If a transform has been registered for a known flag, apply and
+            # update result tuple.
+            if flag & flags:
+                transform = _flagtransforms.get(flag, None)
+                apply = transform[0]
+                if raw:
+                    apply = transform[2]
+                elif operation == 'write':
+                    apply = transform[1]
+                if apply is not None:
+                    text, vhash = apply(self, text)
+                    validatehash = validatehash and vhash
+        return text, validatehash
+
     def checkhash(self, text, node, p1=None, p2=None, rev=None):
         """Check node hash integrity.
 
@@ -1358,6 +1443,9 @@ 
         if not self._inline:
             dfh = self.opener(self.datafile, "a+")
         ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
+
+        text, _ = self._processflags(text, flags, 'write')
+
         try:
             return self._addrevision(node, text, transaction, link, p1, p2,
                                      flags, cachedelta, ifh, dfh)
@@ -1423,7 +1511,7 @@ 
         - text is optional (can be None); if not set, cachedelta must be set.
           if both are set, they must correspond to each other.
         - raw is optional; if set to True, it indicates the revision data is to
-          be treated by processflags() as raw. It is usually set by changegroup
+          be treated by _processflags() as raw. It is usually set by changegroup
           generation and debug commands.
         """
         btext = [text]
@@ -1448,7 +1536,13 @@ 
                 btext[0] = mdiff.patch(basetext, delta)
 
             try:
-                self.checkhash(btext[0], node, p1=p1, p2=p2)
+                validatehash = True
+                if raw:
+                    btext[0], validatehash = self._processflags(btext[0],
+                                                                flags, 'read',
+                                                                raw=raw)
+                if validatehash:
+                    self.checkhash(btext[0], node, p1=p1, p2=p2)
                 if flags & REVIDX_ISCENSORED:
                     raise RevlogError(_('node %s is not censored') % node)
             except CensoredNodeError: