Patchwork D3151: revlog: move censor logic into main revlog class

login
register
mail settings
Submitter phabricator
Date April 6, 2018, 12:51 a.m.
Message ID <differential-rev-PHID-DREV-yuxn5dlwraixlr2xpue4-req@phab.mercurial-scm.org>
Download mbox | patch
Permalink /patch/30415/
State Superseded
Headers show

Comments

phabricator - April 6, 2018, 12:51 a.m.
indygreg created this revision.
Herald added a subscriber: mercurial-devel.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Previously, the revlog class implemented dummy methods for
  various censor-related functionality. Revision censoring was
  (and will continue to be) only possible on filelog instances.
  So filelog implemented these methods to perform something
  reasonable.
  
  A problem with implementing censoring on filelog is that
  it assumes filelog is a revlog. Upcoming work to formalize
  the filelog interface will make this not true.
  
  Furthermore, the censoring logic is security-sensitive. I
  think action-at-a-distance with custom implementation of core
  revlog APIs in derived classes is a bit dangerous. I think at
  a minimum the censor logic should live in revlog.py.
  
  I was tempted to created a "censored revlog" class that
  basically pulled these methods out of filelog. But, I wasn't
  a huge fan of overriding core methods in child classes. A
  reason to do that would be performance. However, the censoring
  code only comes into play when:
  
  - hash verification fails
  - delta generation
  - applying deltas from changegroups
  
  The new code is conditional on an instance attribute. So the
  overhead for running the censored code when the revlog isn't
  censorable is an attribute lookup. All of these operations are
  at least a magnitude slower than a Python attribute lookup. So
  there shouldn't be a performance concern.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D3151

AFFECTED FILES
  mercurial/filelog.py
  mercurial/revlog.py

CHANGE DETAILS




To: indygreg, #hg-reviewers
Cc: mercurial-devel

Patch

diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -117,6 +117,10 @@ 
     metatext = "".join("%s: %s\n" % (k, meta[k]) for k in keys)
     return "\1\n%s\1\n%s" % (metatext, text)
 
+def _censoredtext(text):
+    m, offs = parsemeta(text)
+    return m and "censored" in m
+
 def addflagprocessor(flag, processor):
     """Register a flag processor on a revision data flag.
 
@@ -574,9 +578,11 @@ 
     If mmaplargeindex is True, and an mmapindexthreshold is set, the
     index will be mmapped rather than read if it is larger than the
     configured threshold.
+
+    If censorable is True, the revlog can have censored revisions.
     """
     def __init__(self, opener, indexfile, datafile=None, checkambig=False,
-                 mmaplargeindex=False):
+                 mmaplargeindex=False, censorable=False):
         """
         create a revlog object
 
@@ -589,6 +595,7 @@ 
         #  When True, indexfile is opened with checkambig=True at writing, to
         #  avoid file stat ambiguity.
         self._checkambig = checkambig
+        self._censorable = censorable
         # 3-tuple of (node, rev, text) for a raw revision.
         self._cache = None
         # Maps rev to chain base rev.
@@ -1867,14 +1874,19 @@ 
         Available as a function so that subclasses can extend hash mismatch
         behaviors as needed.
         """
-        if p1 is None and p2 is None:
-            p1, p2 = self.parents(node)
-        if node != self.hash(text, p1, p2):
-            revornode = rev
-            if revornode is None:
-                revornode = templatefilters.short(hex(node))
-            raise RevlogError(_("integrity check failed on %s:%s")
-                % (self.indexfile, pycompat.bytestr(revornode)))
+        try:
+            if p1 is None and p2 is None:
+                p1, p2 = self.parents(node)
+            if node != self.hash(text, p1, p2):
+                revornode = rev
+                if revornode is None:
+                    revornode = templatefilters.short(hex(node))
+                raise RevlogError(_("integrity check failed on %s:%s")
+                    % (self.indexfile, pycompat.bytestr(revornode)))
+        except RevlogError:
+            if self._censorable and _censoredtext(text):
+                raise error.CensoredNodeError(self.indexfile, node, text)
+            raise
 
     def _enforceinlinesize(self, tr, fp=None):
         """Check if the revlog is too big for inline and convert if so.
@@ -2300,11 +2312,33 @@ 
 
     def iscensored(self, rev):
         """Check if a file revision is censored."""
-        return False
+        if not self._censorable:
+            return False
+
+        return self.flags(rev) & REVIDX_ISCENSORED
 
     def _peek_iscensored(self, baserev, delta, flush):
         """Quickly check if a delta produces a censored revision."""
-        return False
+        if not self._censorable:
+            return False
+
+        # Fragile heuristic: unless new file meta keys are added alphabetically
+        # preceding "censored", all censored revisions are prefixed by
+        # "\1\ncensored:". A delta producing such a censored revision must be a
+        # full-replacement delta, so we inspect the first and only patch in the
+        # delta for this prefix.
+        hlen = struct.calcsize(">lll")
+        if len(delta) <= hlen:
+            return False
+
+        oldlen = self.rawsize(baserev)
+        newlen = len(delta) - hlen
+        if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
+            return False
+
+        add = "\1\ncensored:"
+        addlen = len(add)
+        return newlen >= addlen and delta[hlen:hlen + addlen] == add
 
     def getstrippoint(self, minlink):
         """find the minimum rev that must be stripped to strip the linkrev
diff --git a/mercurial/filelog.py b/mercurial/filelog.py
--- a/mercurial/filelog.py
+++ b/mercurial/filelog.py
@@ -7,27 +7,20 @@ 
 
 from __future__ import absolute_import
 
-import struct
-
 from .thirdparty.zope import (
     interface as zi,
 )
 from . import (
-    error,
-    mdiff,
     repository,
     revlog,
 )
 
-def _censoredtext(text):
-    m, offs = revlog.parsemeta(text)
-    return m and "censored" in m
-
 @zi.implementer(repository.ifilestorage)
 class filelog(revlog.revlog):
     def __init__(self, opener, path):
         super(filelog, self).__init__(opener,
-                        "/".join(("data", path + ".i")))
+                        "/".join(("data", path + ".i")),
+                                      censorable=True)
         # full name of the user visible file, relative to the repository root
         self.filename = path
 
@@ -90,35 +83,3 @@ 
             return t2 != text
 
         return True
-
-    def checkhash(self, text, node, p1=None, p2=None, rev=None):
-        try:
-            super(filelog, self).checkhash(text, node, p1=p1, p2=p2, rev=rev)
-        except error.RevlogError:
-            if _censoredtext(text):
-                raise error.CensoredNodeError(self.indexfile, node, text)
-            raise
-
-    def iscensored(self, rev):
-        """Check if a file revision is censored."""
-        return self.flags(rev) & revlog.REVIDX_ISCENSORED
-
-    def _peek_iscensored(self, baserev, delta, flush):
-        """Quickly check if a delta produces a censored revision."""
-        # Fragile heuristic: unless new file meta keys are added alphabetically
-        # preceding "censored", all censored revisions are prefixed by
-        # "\1\ncensored:". A delta producing such a censored revision must be a
-        # full-replacement delta, so we inspect the first and only patch in the
-        # delta for this prefix.
-        hlen = struct.calcsize(">lll")
-        if len(delta) <= hlen:
-            return False
-
-        oldlen = self.rawsize(baserev)
-        newlen = len(delta) - hlen
-        if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
-            return False
-
-        add = "\1\ncensored:"
-        addlen = len(add)
-        return newlen >= addlen and delta[hlen:hlen + addlen] == add