Patchwork [1,of,2] histedit: replace pickle with custom serialization

login
register
mail settings
Submitter Durham Goode
Date April 14, 2015, 5:06 p.m.
Message ID <6289acbbd36ec81eab91.1429031214@dev2000.prn2.facebook.com>
Download mbox | patch
Permalink /patch/8652/
State Accepted
Delegated to: Augie Fackler
Headers show

Comments

Durham Goode - April 14, 2015, 5:06 p.m.
# HG changeset patch
# User Durham Goode <durham@fb.com>
# Date 1428938637 25200
#      Mon Apr 13 08:23:57 2015 -0700
# Node ID 6289acbbd36ec81eab91ed3b8e32b3354bf8cf84
# Parent  b2fb1403994e033584aed8a487ab162a9d75fa80
histedit: replace pickle with custom serialization

Pickle is undesirable, so let's serialize it ourselves. We keep the ability to
parse existing pickle blobs for now.
Ryan McElroy - April 14, 2015, 5:50 p.m.
On 4/14/2015 10:06 AM, Durham Goode wrote:
> # HG changeset patch
> # User Durham Goode <durham@fb.com>
> # Date 1428938637 25200
> #      Mon Apr 13 08:23:57 2015 -0700
> # Node ID 6289acbbd36ec81eab91ed3b8e32b3354bf8cf84
> # Parent  b2fb1403994e033584aed8a487ab162a9d75fa80
> histedit: replace pickle with custom serialization
>
> Pickle is undesirable, so let's serialize it ourselves. We keep the ability to
> parse existing pickle blobs for now.
>
> diff --git a/hgext/histedit.py b/hgext/histedit.py
> --- a/hgext/histedit.py
> +++ b/hgext/histedit.py
> @@ -220,7 +220,12 @@ class histeditstate(object):
>                   raise
>               raise util.Abort(_('no histedit in progress'))
>   
> -        parentctxnode, rules, keep, topmost, replacements = pickle.load(fp)
> +        try:
> +            data = pickle.load(fp)
> +            parentctxnode, rules, keep, topmost, replacements = data
> +        except pickle.UnpicklingError:
> +            data = self._load()
> +            parentctxnode, rules, keep, topmost, replacements = data
>   
>           self.parentctxnode = parentctxnode
>           self.rules = rules
> @@ -230,10 +235,63 @@ class histeditstate(object):
>   
>       def write(self):
>           fp = self.repo.vfs('histedit-state', 'w')
> -        pickle.dump((self.parentctxnode, self.rules, self.keep,
> -                     self.topmost, self.replacements), fp)
> +        fp.write('v1\n')
> +        fp.write('%s\n' % node.hex(self.parentctxnode))
> +        fp.write('%s\n' % node.hex(self.topmost))
> +        fp.write('%s\n' % self.keep)
> +        fp.write('%d\n' % len(self.rules))
> +        for rule in self.rules:
> +            fp.write('%s%s\n' % (rule[1], rule[0]))
I'd prefer rules to be in their own file, so it's easy to edit even 
without --edit-todo.

I'm fine having that in a later change though.

That would also eliminate the need for printing the number of lines into 
the file.
> +        fp.write('%d\n' % len(self.replacements))
> +        for replacement in self.replacements:
> +            fp.write('%s%s\n' % (node.hex(replacement[0]), ''.join(node.hex(r)
> +                for r in replacement[1])))
>           fp.close()
>   
> +    def _load(self):
> +        fp = self.repo.vfs('histedit-state', 'r')
> +        lines = [l[:-1] for l in fp.readlines()]
> +
> +        index = 0
> +        lines[index] # version number

Why have this line if you're not reading it or using it? Perhaps just 
comment it out for documentation?

Or even better, make this forwards-compatible. Look for a version that's 
not v1 and abort with a useful error message like "this histedit was 
started with a newer version of mercurial that uses a file format this 
version of mercurial does not know how to read. Please continue this 
histedit with the newer version of mercurial that started this histedit".

Also, bonus points if you can include in the message version of 
mercurial that knows how to to parse the history. Consider, for example, 
a version line that looks like this:

v1 # 3.4.1+123

That way, the message can show what version of mercurial might be used 
here to make things better.

> +        index += 1
> +
> +        parentctxnode = node.bin(lines[index])
> +        index += 1
> +
> +        topmost = node.bin(lines[index])
> +        index += 1
> +
> +        keep = lines[index] == 'True'
> +        index += 1
> +
> +        # Rules
> +        rules = []
> +        rulelen = int(lines[index])
> +        index += 1
> +        for i in xrange(rulelen):
> +            rule = lines[index]
> +            rulehash = rule[:40]
> +            ruleaction = rule[40:]
> +            rules.append((ruleaction, rulehash))
> +            index += 1
> +
> +        # Replacements
> +        replacements = []
> +        replacementlen = int(lines[index])
> +        index += 1
> +        for i in xrange(replacementlen):
> +            replacement = lines[index]
> +            original = node.bin(replacement[:40])
> +            succ = [node.bin(replacement[i:i + 40]) for i in
> +                    range(40, len(replacement), 40)]
> +            replacements.append((original, succ))
> +            index += 1
> +
> +        fp.close()
> +
> +        return parentctxnode, rules, keep, topmost, replacements
> +
>       def clear(self):
>           self.repo.vfs.unlink('histedit-state')
>   
>
Pierre-Yves David - April 14, 2015, 6:14 p.m.
On 04/14/2015 01:50 PM, Ryan McElroy wrote:
> On 4/14/2015 10:06 AM, Durham Goode wrote:
>> # HG changeset patch
>> # User Durham Goode <durham@fb.com>
>> # Date 1428938637 25200
>> #      Mon Apr 13 08:23:57 2015 -0700
>> # Node ID 6289acbbd36ec81eab91ed3b8e32b3354bf8cf84
>> # Parent  b2fb1403994e033584aed8a487ab162a9d75fa80
>> histedit: replace pickle with custom serialization
>>
>> Pickle is undesirable, so let's serialize it ourselves. We keep the
>> ability to
>> parse existing pickle blobs for now.
>>
>> diff --git a/hgext/histedit.py b/hgext/histedit.py
>> --- a/hgext/histedit.py
>> +++ b/hgext/histedit.py
>> @@ -220,7 +220,12 @@ class histeditstate(object):
>>                   raise
>>               raise util.Abort(_('no histedit in progress'))
>> -        parentctxnode, rules, keep, topmost, replacements =
>> pickle.load(fp)
>> +        try:
>> +            data = pickle.load(fp)
>> +            parentctxnode, rules, keep, topmost, replacements = data
>> +        except pickle.UnpicklingError:
>> +            data = self._load()
>> +            parentctxnode, rules, keep, topmost, replacements = data
>>           self.parentctxnode = parentctxnode
>>           self.rules = rules
>> @@ -230,10 +235,63 @@ class histeditstate(object):
>>       def write(self):
>>           fp = self.repo.vfs('histedit-state', 'w')
>> -        pickle.dump((self.parentctxnode, self.rules, self.keep,
>> -                     self.topmost, self.replacements), fp)
>> +        fp.write('v1\n')
>> +        fp.write('%s\n' % node.hex(self.parentctxnode))
>> +        fp.write('%s\n' % node.hex(self.topmost))
>> +        fp.write('%s\n' % self.keep)
>> +        fp.write('%d\n' % len(self.rules))
>> +        for rule in self.rules:
>> +            fp.write('%s%s\n' % (rule[1], rule[0]))
> I'd prefer rules to be in their own file, so it's easy to edit even
> without --edit-todo.
>
> I'm fine having that in a later change though.
>
> That would also eliminate the need for printing the number of lines into
> the file.

This looks like a smart proposal, so we should probably not do it. This 
kind of open the way to all kind of out of control black magic 
witchcraft from user that will hinter our ability to make things better 
in the future.
Augie Fackler - April 15, 2015, 3:06 p.m.
On Tue, Apr 14, 2015 at 02:14:46PM -0400, Pierre-Yves David wrote:
>
>
> On 04/14/2015 01:50 PM, Ryan McElroy wrote:
> >On 4/14/2015 10:06 AM, Durham Goode wrote:
> >># HG changeset patch
> >># User Durham Goode <durham@fb.com>
> >># Date 1428938637 25200
> >>#      Mon Apr 13 08:23:57 2015 -0700
> >># Node ID 6289acbbd36ec81eab91ed3b8e32b3354bf8cf84
> >># Parent  b2fb1403994e033584aed8a487ab162a9d75fa80
> >>histedit: replace pickle with custom serialization
> >>
> >>Pickle is undesirable, so let's serialize it ourselves. We keep the
> >>ability to
> >>parse existing pickle blobs for now.
> >>
> >>diff --git a/hgext/histedit.py b/hgext/histedit.py
> >>--- a/hgext/histedit.py
> >>+++ b/hgext/histedit.py
> >>@@ -220,7 +220,12 @@ class histeditstate(object):
> >>                  raise
> >>              raise util.Abort(_('no histedit in progress'))
> >>-        parentctxnode, rules, keep, topmost, replacements =
> >>pickle.load(fp)
> >>+        try:
> >>+            data = pickle.load(fp)
> >>+            parentctxnode, rules, keep, topmost, replacements = data
> >>+        except pickle.UnpicklingError:
> >>+            data = self._load()
> >>+            parentctxnode, rules, keep, topmost, replacements = data
> >>          self.parentctxnode = parentctxnode
> >>          self.rules = rules
> >>@@ -230,10 +235,63 @@ class histeditstate(object):
> >>      def write(self):
> >>          fp = self.repo.vfs('histedit-state', 'w')
> >>-        pickle.dump((self.parentctxnode, self.rules, self.keep,
> >>-                     self.topmost, self.replacements), fp)
> >>+        fp.write('v1\n')
> >>+        fp.write('%s\n' % node.hex(self.parentctxnode))
> >>+        fp.write('%s\n' % node.hex(self.topmost))
> >>+        fp.write('%s\n' % self.keep)
> >>+        fp.write('%d\n' % len(self.rules))
> >>+        for rule in self.rules:
> >>+            fp.write('%s%s\n' % (rule[1], rule[0]))
> >I'd prefer rules to be in their own file, so it's easy to edit even
> >without --edit-todo.
> >
> >I'm fine having that in a later change though.
> >
> >That would also eliminate the need for printing the number of lines into
> >the file.
>
> This looks like a smart proposal, so we should probably not do it. This kind
> of open the way to all kind of out of control black magic witchcraft from
> user that will hinter our ability to make things better in the future.

Yes, I was going to make precisely this point. If we let users do
things behind our back, it'll be harder to do smart things in the
future.

>
>
> --
> Pierre-Yves David
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel

Patch

diff --git a/hgext/histedit.py b/hgext/histedit.py
--- a/hgext/histedit.py
+++ b/hgext/histedit.py
@@ -220,7 +220,12 @@  class histeditstate(object):
                 raise
             raise util.Abort(_('no histedit in progress'))
 
-        parentctxnode, rules, keep, topmost, replacements = pickle.load(fp)
+        try:
+            data = pickle.load(fp)
+            parentctxnode, rules, keep, topmost, replacements = data
+        except pickle.UnpicklingError:
+            data = self._load()
+            parentctxnode, rules, keep, topmost, replacements = data
 
         self.parentctxnode = parentctxnode
         self.rules = rules
@@ -230,10 +235,63 @@  class histeditstate(object):
 
     def write(self):
         fp = self.repo.vfs('histedit-state', 'w')
-        pickle.dump((self.parentctxnode, self.rules, self.keep,
-                     self.topmost, self.replacements), fp)
+        fp.write('v1\n')
+        fp.write('%s\n' % node.hex(self.parentctxnode))
+        fp.write('%s\n' % node.hex(self.topmost))
+        fp.write('%s\n' % self.keep)
+        fp.write('%d\n' % len(self.rules))
+        for rule in self.rules:
+            fp.write('%s%s\n' % (rule[1], rule[0]))
+        fp.write('%d\n' % len(self.replacements))
+        for replacement in self.replacements:
+            fp.write('%s%s\n' % (node.hex(replacement[0]), ''.join(node.hex(r)
+                for r in replacement[1])))
         fp.close()
 
+    def _load(self):
+        fp = self.repo.vfs('histedit-state', 'r')
+        lines = [l[:-1] for l in fp.readlines()]
+
+        index = 0
+        lines[index] # version number
+        index += 1
+
+        parentctxnode = node.bin(lines[index])
+        index += 1
+
+        topmost = node.bin(lines[index])
+        index += 1
+
+        keep = lines[index] == 'True'
+        index += 1
+
+        # Rules
+        rules = []
+        rulelen = int(lines[index])
+        index += 1
+        for i in xrange(rulelen):
+            rule = lines[index]
+            rulehash = rule[:40]
+            ruleaction = rule[40:]
+            rules.append((ruleaction, rulehash))
+            index += 1
+
+        # Replacements
+        replacements = []
+        replacementlen = int(lines[index])
+        index += 1
+        for i in xrange(replacementlen):
+            replacement = lines[index]
+            original = node.bin(replacement[:40])
+            succ = [node.bin(replacement[i:i + 40]) for i in
+                    range(40, len(replacement), 40)]
+            replacements.append((original, succ))
+            index += 1
+
+        fp.close()
+
+        return parentctxnode, rules, keep, topmost, replacements
+
     def clear(self):
         self.repo.vfs.unlink('histedit-state')