Patchwork [4,of,7,V2] transaction: add support for non-append files

login
register
mail settings
Submitter Durham Goode
Date March 31, 2014, 11:19 p.m.
Message ID <498a1087dd60ec7234c4.1396307986@dev2000.prn2.facebook.com>
Download mbox | patch
Permalink /patch/4170/
State Superseded
Commit 5dffd06f1e506b796f2d689ce11c1aa7393ad095
Headers show

Comments

Durham Goode - March 31, 2014, 11:19 p.m.
# HG changeset patch
# User Durham Goode <durham@fb.com>
# Date 1395699711 25200
#      Mon Mar 24 15:21:51 2014 -0700
# Node ID 498a1087dd60ec7234c4352e566abe977eb63332
# Parent  f85f9ea96d16e180de3e38cfc468e53441f41743
transaction: add support for non-append files

This adds support for normal, non-append-only files in transactions.  For
example, .hg/store/fncache and .hg/store/phaseroots should be written as part of
the transaction, but are not append only files.

This adds a journal.backupfiles along side the normal journal. This tracks which
files have been backed up as part of the transaction.  transaction.addbackup()
creates a backup of the file (using a hardlink), which is later used to recover
in the event of the transaction failing.

Using a seperate journal allows the repository to still be used by older
versions of Mercurial. A future patch will use this functionality and add tests
for it.
Pierre-Yves David - April 1, 2014, 12:45 a.m.
On 03/31/2014 04:19 PM, Durham Goode wrote:
> # HG changeset patch
> # User Durham Goode <durham@fb.com>
> # Date 1395699711 25200
> #      Mon Mar 24 15:21:51 2014 -0700
> # Node ID 498a1087dd60ec7234c4352e566abe977eb63332
> # Parent  f85f9ea96d16e180de3e38cfc468e53441f41743
> transaction: add support for non-append files
>
> This adds support for normal, non-append-only files in transactions.  For
> example, .hg/store/fncache and .hg/store/phaseroots should be written as part of
> the transaction, but are not append only files.
>
> This adds a journal.backupfiles along side the normal journal. This tracks which
> files have been backed up as part of the transaction.  transaction.addbackup()
> creates a backup of the file (using a hardlink), which is later used to recover
> in the event of the transaction failing.
>
> Using a seperate journal allows the repository to still be used by older
> versions of Mercurial. A future patch will use this functionality and add tests
> for it.

Reading this patches I've an hard to time understand if it would behave 
race free is two process are trying to rollback a transaction at the 
same time.


But the lock protection make it probably impossible.


> diff --git a/mercurial/transaction.py b/mercurial/transaction.py
> --- a/mercurial/transaction.py
> +++ b/mercurial/transaction.py
> @@ -12,8 +12,8 @@
>   # GNU General Public License version 2 or any later version.
>
>   from i18n import _
> -import errno
> -import error
> +import errno, os
> +import error, util
>
>   def active(func):
>       def _active(self, *args, **kwds):
> @@ -24,22 +24,35 @@
>       return _active
>
>   def _playback(journal, report, opener, entries, unlink=True):

A docstring explaining how the process happen would be great.


> +    backupfiles = []
>       for f, o, ignore in entries:
>           if o or not unlink:
> -            try:
> -                fp = opener(f, 'a')
> -                fp.truncate(o)
> -                fp.close()
> -            except IOError:
> -                report(_("failed to truncate %s\n") % f)
> -                raise
> +            if isinstance(o, int):
> +                try:
> +                    fp = opener(f, 'a')
> +                    fp.truncate(o)
> +                    fp.close()
> +                except IOError:
> +                    report(_("failed to truncate %s\n") % f)
> +                    raise
> +            else:
> +                fpath = opener.join(f)
> +                opath = opener.join(o)
> +                util.copyfile(opath, fpath)
> +                backupfiles.append(o)

Why do you use the same list entries for two different entry.

It looks like the transaction could have a fullfile entry with different 
content.

(Using isinstance is usually considered fragile and inelegant)



>           else:
>               try:
>                   opener.unlink(f)
>               except (IOError, OSError), inst:
>                   if inst.errno != errno.ENOENT:
>                       raise
> +
>       opener.unlink(journal)
> +    backuppath = "%s.backupfiles" % journal
> +    if opener.exists(backuppath):
> +        opener.unlink(backuppath)
> +    for f in backupfiles:
> +        opener.unlink(f)
>
>   class transaction(object):
>       def __init__(self, report, opener, journal, after=None, createmode=None,
> @@ -56,9 +69,12 @@
>           self.journal = journal
>           self._queue = []
>
> +        self.backupfilesjournal = "%s.backupfiles" % journal
>           self.file = opener.open(self.journal, "w")
> +        self.backupsfile = opener.open(self.backupfilesjournal, 'w')
>           if createmode is not None:
>               opener.chmod(self.journal, createmode & 0666)
> +            opener.chmod(self.backupfilesjournal, createmode & 0666)
>
>       def __del__(self):
>           if self.journal:
> @@ -71,11 +87,23 @@
>       @active
>       def endgroup(self):
>           q = self._queue.pop()
> -        d = ''.join(['%s\0%d\n' % (x[0], x[1]) for x in q])
>           self.entries.extend(q)
> +
> +        offsets = []
> +        backups = []
> +        for f, o, _ in q:
> +            if isinstance(o, int):
> +                offsets.append((f, o))
> +            else:
> +                backups.append((f, o))
> +        d = ''.join(['%s\0%d\n' % (f, o) for f, o in offsets])
>           self.file.write(d)
>           self.file.flush()
>
> +        d = ''.join(['%s\0%s\0' % (f, o) for f, o in backups])
> +        self.backupsfile.write(d)
> +        self.backupsfile.flush()
> +
>       @active
>       def add(self, file, offset, data=None):
>           if file in self.map:
> @@ -91,6 +119,27 @@
>           self.file.flush()
>
>       @active
> +    def addbackup(self, file):


A docstring would be nice


> +        if file in self.map:
> +            return
> +        backupfile = "journal.%s" % file
> +        if self.opener.exists(file):
> +            filepath = self.opener.join(file)
> +            backuppath = self.opener.join(backupfile)
> +            util.copyfiles(filepath, backuppath, hardlink=True)
> +        else:
> +            self.add(file, 0)
> +            return
> +
> +        if self._queue:
> +            self._queue[-1].append((file, backupfile))
> +            return
> +        self.entries.append((file, backupfile, None))
> +        self.map[file] = len(self.entries) - 1
> +        self.backupsfile.write("%s\0%s\0" % (file, backupfile))
> +        self.backupsfile.flush()
> +
> +    @active
>       def find(self, file):
>           if file in self.map:
>               return self.entries[self.map[file]]
> @@ -136,11 +185,16 @@
>           if self.count != 0:
>               return
>           self.file.close()
> -        self.entries = []
>           if self.after:
>               self.after()
>           if self.opener.isfile(self.journal):
>               self.opener.unlink(self.journal)
> +        if self.opener.isfile(self.backupfilesjournal):
> +            self.opener.unlink(self.backupfilesjournal)
> +            for f, o, _ in self.entries:
> +                if not isinstance(o, int):
> +                    self.opener.unlink(o)
> +        self.entries = []
>           self.journal = None
>
>       @active
> @@ -162,6 +216,7 @@
>               if not self.entries:
>                   if self.journal:
>                       self.opener.unlink(self.journal)
> +                    self.opener.unlink(self.backupfilesjournal)
>                   return
>
>               self.report(_("transaction abort!\n"))
> @@ -189,4 +244,14 @@
>           except ValueError:
>               report(_("couldn't read journal entry %r!\n") % l)
>
> +    backupjournal = "%s.backupfiles" % file
> +    if opener.exists(backupjournal):
> +        fp = opener.open(backupjournal)
> +        data = fp.read()
> +        if len(data) > 0:
> +            parts = data.split('\0')
> +            for i in xrange(0, len(parts), 2):
> +                f, o = parts[i:i + 1]
> +                entries.append((f, o, None))
> +
>       _playback(file, report, opener, entries)


Updating (or most probably creating :-() the dostring of this function 
with the on disk format would be great.
Durham Goode - April 1, 2014, 1:27 a.m.
On 3/31/14 5:45 PM, "Pierre-Yves David" <pierre-yves.david@ens-lyon.org>
wrote:

>
>
>On 03/31/2014 04:19 PM, Durham Goode wrote:
>> # HG changeset patch
>> # User Durham Goode <durham@fb.com>
>> # Date 1395699711 25200
>> #      Mon Mar 24 15:21:51 2014 -0700
>> # Node ID 498a1087dd60ec7234c4352e566abe977eb63332
>> # Parent  f85f9ea96d16e180de3e38cfc468e53441f41743
>> transaction: add support for non-append files
>>
>> This adds support for normal, non-append-only files in transactions.
>>For
>> example, .hg/store/fncache and .hg/store/phaseroots should be written
>>as part of
>> the transaction, but are not append only files.
>>
>> This adds a journal.backupfiles along side the normal journal. This
>>tracks which
>> files have been backed up as part of the transaction.
>>transaction.addbackup()
>> creates a backup of the file (using a hardlink), which is later used to
>>recover
>> in the event of the transaction failing.
>>
>> Using a seperate journal allows the repository to still be used by older
>> versions of Mercurial. A future patch will use this functionality and
>>add tests
>> for it.
>
>> +    backupfiles = []
>>       for f, o, ignore in entries:
>>           if o or not unlink:
>> -            try:
>> -                fp = opener(f, 'a')
>> -                fp.truncate(o)
>> -                fp.close()
>> -            except IOError:
>> -                report(_("failed to truncate %s\n") % f)
>> -                raise
>> +            if isinstance(o, int):
>> +                try:
>> +                    fp = opener(f, 'a')
>> +                    fp.truncate(o)
>> +                    fp.close()
>> +                except IOError:
>> +                    report(_("failed to truncate %s\n") % f)
>> +                    raise
>> +            else:
>> +                fpath = opener.join(f)
>> +                opath = opener.join(o)
>> +                util.copyfile(opath, fpath)
>> +                backupfiles.append(o)
>
>Why do you use the same list entries for two different entry.
>
>It looks like the transaction could have a fullfile entry with different
>content.
>
>(Using isinstance is usually considered fragile and inelegant)

I used the same list to minimize the amount of code churn and duplication
in this patch. It's mainly a problem because transactions have a notion of
'groups' that use this single list idea and would require a fair bit of
churn to make work with separate lists.

I'll implement your other suggestions though.

Patch

diff --git a/mercurial/transaction.py b/mercurial/transaction.py
--- a/mercurial/transaction.py
+++ b/mercurial/transaction.py
@@ -12,8 +12,8 @@ 
 # GNU General Public License version 2 or any later version.
 
 from i18n import _
-import errno
-import error
+import errno, os
+import error, util
 
 def active(func):
     def _active(self, *args, **kwds):
@@ -24,22 +24,35 @@ 
     return _active
 
 def _playback(journal, report, opener, entries, unlink=True):
+    backupfiles = []
     for f, o, ignore in entries:
         if o or not unlink:
-            try:
-                fp = opener(f, 'a')
-                fp.truncate(o)
-                fp.close()
-            except IOError:
-                report(_("failed to truncate %s\n") % f)
-                raise
+            if isinstance(o, int):
+                try:
+                    fp = opener(f, 'a')
+                    fp.truncate(o)
+                    fp.close()
+                except IOError:
+                    report(_("failed to truncate %s\n") % f)
+                    raise
+            else:
+                fpath = opener.join(f)
+                opath = opener.join(o)
+                util.copyfile(opath, fpath)
+                backupfiles.append(o)
         else:
             try:
                 opener.unlink(f)
             except (IOError, OSError), inst:
                 if inst.errno != errno.ENOENT:
                     raise
+
     opener.unlink(journal)
+    backuppath = "%s.backupfiles" % journal
+    if opener.exists(backuppath):
+        opener.unlink(backuppath)
+    for f in backupfiles:
+        opener.unlink(f)
 
 class transaction(object):
     def __init__(self, report, opener, journal, after=None, createmode=None,
@@ -56,9 +69,12 @@ 
         self.journal = journal
         self._queue = []
 
+        self.backupfilesjournal = "%s.backupfiles" % journal
         self.file = opener.open(self.journal, "w")
+        self.backupsfile = opener.open(self.backupfilesjournal, 'w')
         if createmode is not None:
             opener.chmod(self.journal, createmode & 0666)
+            opener.chmod(self.backupfilesjournal, createmode & 0666)
 
     def __del__(self):
         if self.journal:
@@ -71,11 +87,23 @@ 
     @active
     def endgroup(self):
         q = self._queue.pop()
-        d = ''.join(['%s\0%d\n' % (x[0], x[1]) for x in q])
         self.entries.extend(q)
+
+        offsets = []
+        backups = []
+        for f, o, _ in q:
+            if isinstance(o, int):
+                offsets.append((f, o))
+            else:
+                backups.append((f, o))
+        d = ''.join(['%s\0%d\n' % (f, o) for f, o in offsets])
         self.file.write(d)
         self.file.flush()
 
+        d = ''.join(['%s\0%s\0' % (f, o) for f, o in backups])
+        self.backupsfile.write(d)
+        self.backupsfile.flush()
+
     @active
     def add(self, file, offset, data=None):
         if file in self.map:
@@ -91,6 +119,27 @@ 
         self.file.flush()
 
     @active
+    def addbackup(self, file):
+        if file in self.map:
+            return
+        backupfile = "journal.%s" % file
+        if self.opener.exists(file):
+            filepath = self.opener.join(file)
+            backuppath = self.opener.join(backupfile)
+            util.copyfiles(filepath, backuppath, hardlink=True)
+        else:
+            self.add(file, 0)
+            return
+
+        if self._queue:
+            self._queue[-1].append((file, backupfile))
+            return
+        self.entries.append((file, backupfile, None))
+        self.map[file] = len(self.entries) - 1
+        self.backupsfile.write("%s\0%s\0" % (file, backupfile))
+        self.backupsfile.flush()
+
+    @active
     def find(self, file):
         if file in self.map:
             return self.entries[self.map[file]]
@@ -136,11 +185,16 @@ 
         if self.count != 0:
             return
         self.file.close()
-        self.entries = []
         if self.after:
             self.after()
         if self.opener.isfile(self.journal):
             self.opener.unlink(self.journal)
+        if self.opener.isfile(self.backupfilesjournal):
+            self.opener.unlink(self.backupfilesjournal)
+            for f, o, _ in self.entries:
+                if not isinstance(o, int):
+                    self.opener.unlink(o)
+        self.entries = []
         self.journal = None
 
     @active
@@ -162,6 +216,7 @@ 
             if not self.entries:
                 if self.journal:
                     self.opener.unlink(self.journal)
+                    self.opener.unlink(self.backupfilesjournal)
                 return
 
             self.report(_("transaction abort!\n"))
@@ -189,4 +244,14 @@ 
         except ValueError:
             report(_("couldn't read journal entry %r!\n") % l)
 
+    backupjournal = "%s.backupfiles" % file
+    if opener.exists(backupjournal):
+        fp = opener.open(backupjournal)
+        data = fp.read()
+        if len(data) > 0:
+            parts = data.split('\0')
+            for i in xrange(0, len(parts), 2):
+                f, o = parts[i:i + 1]
+                entries.append((f, o, None))
+
     _playback(file, report, opener, entries)