Patchwork [6,of,6,censor,RFC] revlog: addgroup checks if incoming deltas add censored revs, sets flag bit

login
register
mail settings
Submitter adgar@google.com
Date Jan. 23, 2015, 11:53 p.m.
Message ID <9bc824ed68e8f9b726d5.1422057206@adgar.nyc.corp.google.com>
Download mbox | patch
Permalink /patch/7548/
State Superseded
Commit 4bfe9f2d9761160122968c2a96824d964fb6e3e9
Headers show

Comments

adgar@google.com - Jan. 23, 2015, 11:53 p.m.
# HG changeset patch
# User Mike Edgar <adgar@google.com>
# Date 1421266568 18000
#      Wed Jan 14 15:16:08 2015 -0500
# Node ID 9bc824ed68e8f9b726d5d227c263b62bdebda6cb
# Parent  a047ca8ca035158ecd9d034b9b4232be8b8080d4
revlog: addgroup checks if incoming deltas add censored revs, sets flag bit

A censored revision stored in a revlog should have the censored revlog index
flag bit set. This implies we must know if a revision is censored before we
add it to the revlog.

Censor metadata is stored in the revision text. When adding a changegroup to
a revlog, we have a sequence of deltas, not a sequence of full revision texts.
This means we won't always have easy access to the censor metadata, so we
don't have an easy way to know when to set the relevant index flag bit.

This change introduces a heuristic based on assumptions around the Mercurial
delta format and filelog metadata. Since filelog metadata is at the start of
the revision text, we can be sure that when a delta produces a censored
revision, the first patch in that delta will introduce the censor metadata
line.  There are only two possible such patches: one which also adds the
"\1\n" line delimiting the start of filelog metadata, and one which doesn't.

See more at http://mercurial.selenic.com/wiki/CensorPlan?action=subscribe

Patch

diff -r a047ca8ca035 -r 9bc824ed68e8 mercurial/filelog.py
--- a/mercurial/filelog.py	Wed Jan 21 22:09:32 2015 -0500
+++ b/mercurial/filelog.py	Wed Jan 14 15:16:08 2015 -0500
@@ -6,7 +6,7 @@ 
 # GNU General Public License version 2 or any later version.
 
 import error, revlog
-import re
+import re, struct
 
 _mdre = re.compile('\1\n')
 def parsemeta(text):
@@ -110,3 +110,40 @@ 
 
     def _file(self, f):
         return filelog(self.opener, f)
+
+    def _peek_iscensored(self, baserev, delta, flush):
+        """Quickly check if a delta produces a censored revision."""
+        # Fragile heuristic: unless new file meta keys are added alphabetically
+        # preceding "censored", all censored revisions are prefixed by
+        # "\1\ncensored:". A delta producing such a prefix either:
+        #
+        # 1. Has no \1\n prefix in its base, and must start with at least three
+        #    new lines to add "\1\ncensored:<data>\n\1\n" (more if the censored
+        #    revision was a copy).
+        # 2. Has a censored base and must replace the whole revision, adding
+        #    at least three lines as in case 1.
+        # 3. Has a \1\n prefix in its base, perhaps because it was copied but
+        #    not necessarily. The "censored:<data>\n" line will be added at
+        #    position 2.
+        hlen = struct.calcsize(">lll")
+        if len(delta) <= hlen:
+            return False
+        start, skip, copy = struct.unpack(">lll", delta[:hlen])
+        # Cases 1, 2: first patch must insert metadata at position 0.
+        add = "\1\ncensored:"
+        addlen = len(add)
+        if (start == 0 and copy >= addlen and len(delta) >= hlen + addlen
+            and delta[hlen:hlen + addlen] == add):
+            return True
+        # Case 3: first patch starts after "\1\n" of base, position 2.
+        ins = "censored:"
+        inslen = len(ins)
+        if (start == 2 and copy >= inslen and len(delta) >= hlen + inslen
+            and delta[hlen:hlen + inslen] == ins):
+            # Likely censored, but we need to verify the base really does start
+            # with "\1\n". False positives should be rare enough to justify
+            # flushing file handles and decoding the base.
+            flush()
+            return self.revision(baserev)[0:2] == "\1\n"
+
+        return False
diff -r a047ca8ca035 -r 9bc824ed68e8 mercurial/revlog.py
--- a/mercurial/revlog.py	Wed Jan 21 22:09:32 2015 -0500
+++ b/mercurial/revlog.py	Wed Jan 14 15:16:08 2015 -0500
@@ -1374,7 +1374,10 @@ 
             transaction.add(self.indexfile, isize, r)
             transaction.add(self.datafile, end)
             dfh = self.opener(self.datafile, "a")
-
+        def flush():
+            if dfh:
+                dfh.flush()
+            ifh.flush()
         try:
             # loop through our set of deltas
             chain = None
@@ -1418,9 +1421,13 @@ 
                         raise error.CensoredBaseError(self.indexfile,
                                                       self.node(baserev))
 
+                flags = REVIDX_DEFAULT_FLAGS
+                if self._peek_iscensored(baserev, delta, flush):
+                    flags |= REVIDX_ISCENSORED
+
                 chain = self._addrevision(node, None, transaction, link,
-                                          p1, p2, REVIDX_DEFAULT_FLAGS,
-                                          (baserev, delta), ifh, dfh)
+                                          p1, p2, flags, (baserev, delta),
+                                          ifh, dfh)
                 if not dfh and not self._inline:
                     # addrevision switched from inline to conventional
                     # reopen the index
@@ -1438,6 +1445,10 @@ 
         """Check if a file revision is censored."""
         return False
 
+    def _peek_iscensored(self, baserev, delta, flush):
+        """Quickly check if a delta produces a censored revision."""
+        return False
+
     def getstrippoint(self, minlink):
         """find the minimum rev that must be stripped to strip the linkrev