Patchwork [censor,RFC] changegroup: emit full-replacement deltas if either revision is censored

login
register
mail settings
Submitter adgar@google.com
Date Feb. 11, 2015, 8:51 p.m.
Message ID <a770441a605cfcbdc757.1423687916@adgar.nyc.corp.google.com>
Download mbox | patch
Permalink /patch/7792/
State Superseded
Commit 903c7e8c97ad97dd579de976d4e62dd7af4965b4
Headers show

Comments

adgar@google.com - Feb. 11, 2015, 8:51 p.m.
# HG changeset patch
# User Mike Edgar <adgar@google.com>
# Date 1421896172 18000
#      Wed Jan 21 22:09:32 2015 -0500
# Node ID a770441a605cfcbdc757a02d3847c1e8880e29e4
# Parent  1b3b6d629d6c1f8544db349707e47293965b94c0
changegroup: emit full-replacement deltas if either revision is censored

To ensure that exchanged deltas in the presence of censored revisions can
always be applied to the recipient repository, the deltas must replace the
entire base text. To make this restriction reasonably enforceable, the delta
must do so with a single patch operation.

For background and broader design of the censorship feature, see:
http://mercurial.selenic.com/wiki/CensorPlan
Augie Fackler - Feb. 13, 2015, 5:43 p.m.
On Wed, Feb 11, 2015 at 03:51:56PM -0500, Mike Edgar wrote:
> # HG changeset patch
> # User Mike Edgar <adgar@google.com>
> # Date 1421896172 18000
> #      Wed Jan 21 22:09:32 2015 -0500
> # Node ID a770441a605cfcbdc757a02d3847c1e8880e29e4
> # Parent  1b3b6d629d6c1f8544db349707e47293965b94c0
> changegroup: emit full-replacement deltas if either revision is censored
>
> To ensure that exchanged deltas in the presence of censored revisions can
> always be applied to the recipient repository, the deltas must replace the
> entire base text. To make this restriction reasonably enforceable, the delta
> must do so with a single patch operation.
>
> For background and broader design of the censorship feature, see:
> http://mercurial.selenic.com/wiki/CensorPlan
>
> diff -r 1b3b6d629d6c -r a770441a605c mercurial/changegroup.py
> --- a/mercurial/changegroup.py	Wed Jan 21 17:11:37 2015 -0500
> +++ b/mercurial/changegroup.py	Wed Jan 21 22:09:32 2015 -0500
> @@ -481,7 +481,17 @@
>          base = self.deltaparent(revlog, rev, p1, p2, prev)
>
>          prefix = ''
> -        if base == nullrev:
> +        if revlog.iscensored(base) or revlog.iscensored(rev):
> +            try:
> +                delta = revlog.revision(rev)
> +            except error.CensoredNodeError, e:
> +                delta = e.metadata

So the metadata of a CensoredNodeError is actually the delta? Could we
name it delta instead?

> +            if base == nullrev:
> +                prefix = mdiff.trivialdiffheader(len(delta))
> +            else:
> +                baselen = revlog.rawsize(base)
> +                prefix = mdiff.replacediffheader(baselen, len(delta))
> +        elif base == nullrev:
>              delta = revlog.revision(node)
>              prefix = mdiff.trivialdiffheader(len(delta))
>          else:
> diff -r 1b3b6d629d6c -r a770441a605c mercurial/error.py
> --- a/mercurial/error.py	Wed Jan 21 17:11:37 2015 -0500
> +++ b/mercurial/error.py	Wed Jan 21 22:09:32 2015 -0500
> @@ -138,9 +138,10 @@
>  class CensoredNodeError(RevlogError):
>      """error raised when content verification fails on a censored node"""

This docstring might be polite and document that the CensoredNodeError
includes the delta that we refused to apply because it's for a
censored node. (Don't just use my wording though, unless that actually
was clear.)

>
> -    def __init__(self, filename, node):
> +    def __init__(self, filename, node, metadata):
>          from node import short
>          RevlogError.__init__(self, '%s:%s' % (filename, short(node)))
> +        self.metadata = metadata
>
>  class CensoredBaseError(RevlogError):
>      """error raised when a delta is rejected because its base is censored
> diff -r 1b3b6d629d6c -r a770441a605c mercurial/filelog.py
> --- a/mercurial/filelog.py	Wed Jan 21 17:11:37 2015 -0500
> +++ b/mercurial/filelog.py	Wed Jan 21 22:09:32 2015 -0500
> @@ -101,7 +101,7 @@
>              super(filelog, self).checkhash(text, p1, p2, node, rev=rev)
>          except error.RevlogError:
>              if _censoredtext(text):
> -                raise error.CensoredNodeError(self.indexfile, node)
> +                raise error.CensoredNodeError(self.indexfile, node, text)
>              raise
>
>      def iscensored(self, rev):
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
adgar@google.com - Feb. 18, 2015, 7:05 p.m.
On Fri, Feb 13, 2015 at 12:43 PM, Augie Fackler <raf@durin42.com> wrote:

> On Wed, Feb 11, 2015 at 03:51:56PM -0500, Mike Edgar wrote:
> > # HG changeset patch
> > # User Mike Edgar <adgar@google.com>
> > # Date 1421896172 18000
> > #      Wed Jan 21 22:09:32 2015 -0500
> > # Node ID a770441a605cfcbdc757a02d3847c1e8880e29e4
> > # Parent  1b3b6d629d6c1f8544db349707e47293965b94c0
> > changegroup: emit full-replacement deltas if either revision is censored
> >
> > To ensure that exchanged deltas in the presence of censored revisions can
> > always be applied to the recipient repository, the deltas must replace
> the
> > entire base text. To make this restriction reasonably enforceable, the
> delta
> > must do so with a single patch operation.
> >
> > For background and broader design of the censorship feature, see:
> > http://mercurial.selenic.com/wiki/CensorPlan
> >
> > diff -r 1b3b6d629d6c -r a770441a605c mercurial/changegroup.py
> > --- a/mercurial/changegroup.py        Wed Jan 21 17:11:37 2015 -0500
> > +++ b/mercurial/changegroup.py        Wed Jan 21 22:09:32 2015 -0500
> > @@ -481,7 +481,17 @@
> >          base = self.deltaparent(revlog, rev, p1, p2, prev)
> >
> >          prefix = ''
> > -        if base == nullrev:
> > +        if revlog.iscensored(base) or revlog.iscensored(rev):
> > +            try:
> > +                delta = revlog.revision(rev)
> > +            except error.CensoredNodeError, e:
> > +                delta = e.metadata
>
> So the metadata of a CensoredNodeError is actually the delta? Could we
> name it delta instead?


It's actually the tombstone data as fulltext. I'll rename the "metadata"
attribute to be "tombstone" in v2.

As for "delta" - that local variable name is currently a bit of a misnomer.
In some cases, the entire changegroup delta ends up in the "delta"
variable, but in others, it's split across "prefix" and "delta" which are
yielded separately. I don't believe the separate yielding at this layer
would make a performance difference ("prefix" is either 0 or 12 bytes long)
and I'm sure it isn't a correctness detail.

I can prepare a separate change which refactors this, eliminating the
"prefix" variable and ensuring that "delta" actually always contains a
valid delta. That might make the new censorship changes more obviously
correct. What do you think?


>
> > +            if base == nullrev:
> > +                prefix = mdiff.trivialdiffheader(len(delta))
> > +            else:
> > +                baselen = revlog.rawsize(base)
> > +                prefix = mdiff.replacediffheader(baselen, len(delta))
> > +        elif base == nullrev:
> >              delta = revlog.revision(node)
> >              prefix = mdiff.trivialdiffheader(len(delta))
> >          else:
> > diff -r 1b3b6d629d6c -r a770441a605c mercurial/error.py
> > --- a/mercurial/error.py      Wed Jan 21 17:11:37 2015 -0500
> > +++ b/mercurial/error.py      Wed Jan 21 22:09:32 2015 -0500
> > @@ -138,9 +138,10 @@
> >  class CensoredNodeError(RevlogError):
> >      """error raised when content verification fails on a censored
> node"""
>
> This docstring might be polite and document that the CensoredNodeError
> includes the delta that we refused to apply because it's for a
> censored node. (Don't just use my wording though, unless that actually
> was clear.)


Great point, thanks - will include this in v2.


>
> >
> > -    def __init__(self, filename, node):
> > +    def __init__(self, filename, node, metadata):
> >          from node import short
> >          RevlogError.__init__(self, '%s:%s' % (filename, short(node)))
> > +        self.metadata = metadata
> >
> >  class CensoredBaseError(RevlogError):
> >      """error raised when a delta is rejected because its base is
> censored
> > diff -r 1b3b6d629d6c -r a770441a605c mercurial/filelog.py
> > --- a/mercurial/filelog.py    Wed Jan 21 17:11:37 2015 -0500
> > +++ b/mercurial/filelog.py    Wed Jan 21 22:09:32 2015 -0500
> > @@ -101,7 +101,7 @@
> >              super(filelog, self).checkhash(text, p1, p2, node, rev=rev)
> >          except error.RevlogError:
> >              if _censoredtext(text):
> > -                raise error.CensoredNodeError(self.indexfile, node)
> > +                raise error.CensoredNodeError(self.indexfile, node,
> text)
> >              raise
> >
> >      def iscensored(self, rev):
> > _______________________________________________
> > Mercurial-devel mailing list
> > Mercurial-devel@selenic.com
> > http://selenic.com/mailman/listinfo/mercurial-devel
>
Augie Fackler - Feb. 20, 2015, 2:55 p.m.
On Feb 18, 2015 2:06 PM, "Michael Edgar" <adgar@google.com> wrote:
>
>
>
> On Fri, Feb 13, 2015 at 12:43 PM, Augie Fackler <raf@durin42.com> wrote:
>>
>> On Wed, Feb 11, 2015 at 03:51:56PM -0500, Mike Edgar wrote:
>> > # HG changeset patch
>> > # User Mike Edgar <adgar@google.com>
>> > # Date 1421896172 18000
>> > #      Wed Jan 21 22:09:32 2015 -0500
>> > # Node ID a770441a605cfcbdc757a02d3847c1e8880e29e4
>> > # Parent  1b3b6d629d6c1f8544db349707e47293965b94c0
>> > changegroup: emit full-replacement deltas if either revision is
censored
>> >
>> > To ensure that exchanged deltas in the presence of censored revisions
can
>> > always be applied to the recipient repository, the deltas must replace
the
>> > entire base text. To make this restriction reasonably enforceable, the
delta
>> > must do so with a single patch operation.
>> >
>> > For background and broader design of the censorship feature, see:
>> > http://mercurial.selenic.com/wiki/CensorPlan
>> >
>> > diff -r 1b3b6d629d6c -r a770441a605c mercurial/changegroup.py
>> > --- a/mercurial/changegroup.py        Wed Jan 21 17:11:37 2015 -0500
>> > +++ b/mercurial/changegroup.py        Wed Jan 21 22:09:32 2015 -0500
>> > @@ -481,7 +481,17 @@
>> >          base = self.deltaparent(revlog, rev, p1, p2, prev)
>> >
>> >          prefix = ''
>> > -        if base == nullrev:
>> > +        if revlog.iscensored(base) or revlog.iscensored(rev):
>> > +            try:
>> > +                delta = revlog.revision(rev)
>> > +            except error.CensoredNodeError, e:
>> > +                delta = e.metadata
>>
>> So the metadata of a CensoredNodeError is actually the delta? Could we
>> name it delta instead?
>
>
> It's actually the tombstone data as fulltext. I'll rename the "metadata"
attribute to be "tombstone" in v2.
>
> As for "delta" - that local variable name is currently a bit of a
misnomer. In some cases, the entire changegroup delta ends up in the
"delta" variable, but in others, it's split across "prefix" and "delta"
which are yielded separately. I don't believe the separate yielding at this
layer would make a performance difference ("prefix" is either 0 or 12 bytes
long) and I'm sure it isn't a correctness detail.
>
> I can prepare a separate change which refactors this, eliminating the
"prefix" variable and ensuring that "delta" actually always contains a
valid delta. That might make the new censorship changes more obviously
correct. What do you think?

That sounds like a good improvement for clarity.

>
>>
>>
>> > +            if base == nullrev:
>> > +                prefix = mdiff.trivialdiffheader(len(delta))
>> > +            else:
>> > +                baselen = revlog.rawsize(base)
>> > +                prefix = mdiff.replacediffheader(baselen, len(delta))
>> > +        elif base == nullrev:
>> >              delta = revlog.revision(node)
>> >              prefix = mdiff.trivialdiffheader(len(delta))
>> >          else:
>> > diff -r 1b3b6d629d6c -r a770441a605c mercurial/error.py
>> > --- a/mercurial/error.py      Wed Jan 21 17:11:37 2015 -0500
>> > +++ b/mercurial/error.py      Wed Jan 21 22:09:32 2015 -0500
>> > @@ -138,9 +138,10 @@
>> >  class CensoredNodeError(RevlogError):
>> >      """error raised when content verification fails on a censored
node"""
>>
>> This docstring might be polite and document that the CensoredNodeError
>> includes the delta that we refused to apply because it's for a
>> censored node. (Don't just use my wording though, unless that actually
>> was clear.)
>
>
> Great point, thanks - will include this in v2.
>
>>
>>
>> >
>> > -    def __init__(self, filename, node):
>> > +    def __init__(self, filename, node, metadata):
>> >          from node import short
>> >          RevlogError.__init__(self, '%s:%s' % (filename, short(node)))
>> > +        self.metadata = metadata
>> >
>> >  class CensoredBaseError(RevlogError):
>> >      """error raised when a delta is rejected because its base is
censored
>> > diff -r 1b3b6d629d6c -r a770441a605c mercurial/filelog.py
>> > --- a/mercurial/filelog.py    Wed Jan 21 17:11:37 2015 -0500
>> > +++ b/mercurial/filelog.py    Wed Jan 21 22:09:32 2015 -0500
>> > @@ -101,7 +101,7 @@
>> >              super(filelog, self).checkhash(text, p1, p2, node,
rev=rev)
>> >          except error.RevlogError:
>> >              if _censoredtext(text):
>> > -                raise error.CensoredNodeError(self.indexfile, node)
>> > +                raise error.CensoredNodeError(self.indexfile, node,
text)
>> >              raise
>> >
>> >      def iscensored(self, rev):
>> > _______________________________________________
>> > Mercurial-devel mailing list
>> > Mercurial-devel@selenic.com
>> > http://selenic.com/mailman/listinfo/mercurial-devel
>
>
>
>
> --
> Michael Edgar | Software Engineer | adgar@google.com | 518-496-6958
>

Patch

diff -r 1b3b6d629d6c -r a770441a605c mercurial/changegroup.py
--- a/mercurial/changegroup.py	Wed Jan 21 17:11:37 2015 -0500
+++ b/mercurial/changegroup.py	Wed Jan 21 22:09:32 2015 -0500
@@ -481,7 +481,17 @@ 
         base = self.deltaparent(revlog, rev, p1, p2, prev)
 
         prefix = ''
-        if base == nullrev:
+        if revlog.iscensored(base) or revlog.iscensored(rev):
+            try:
+                delta = revlog.revision(rev)
+            except error.CensoredNodeError, e:
+                delta = e.metadata
+            if base == nullrev:
+                prefix = mdiff.trivialdiffheader(len(delta))
+            else:
+                baselen = revlog.rawsize(base)
+                prefix = mdiff.replacediffheader(baselen, len(delta))
+        elif base == nullrev:
             delta = revlog.revision(node)
             prefix = mdiff.trivialdiffheader(len(delta))
         else:
diff -r 1b3b6d629d6c -r a770441a605c mercurial/error.py
--- a/mercurial/error.py	Wed Jan 21 17:11:37 2015 -0500
+++ b/mercurial/error.py	Wed Jan 21 22:09:32 2015 -0500
@@ -138,9 +138,10 @@ 
 class CensoredNodeError(RevlogError):
     """error raised when content verification fails on a censored node"""
 
-    def __init__(self, filename, node):
+    def __init__(self, filename, node, metadata):
         from node import short
         RevlogError.__init__(self, '%s:%s' % (filename, short(node)))
+        self.metadata = metadata
 
 class CensoredBaseError(RevlogError):
     """error raised when a delta is rejected because its base is censored
diff -r 1b3b6d629d6c -r a770441a605c mercurial/filelog.py
--- a/mercurial/filelog.py	Wed Jan 21 17:11:37 2015 -0500
+++ b/mercurial/filelog.py	Wed Jan 21 22:09:32 2015 -0500
@@ -101,7 +101,7 @@ 
             super(filelog, self).checkhash(text, p1, p2, node, rev=rev)
         except error.RevlogError:
             if _censoredtext(text):
-                raise error.CensoredNodeError(self.indexfile, node)
+                raise error.CensoredNodeError(self.indexfile, node, text)
             raise
 
     def iscensored(self, rev):