Submitter | Durham Goode |
---|---|
Date | March 31, 2014, 11:19 p.m. |
Message ID | <498a1087dd60ec7234c4.1396307986@dev2000.prn2.facebook.com> |
Download | mbox | patch |
Permalink | /patch/4170/ |
State | Superseded |
Commit | 5dffd06f1e506b796f2d689ce11c1aa7393ad095 |
Headers | show |
Comments
On 03/31/2014 04:19 PM, Durham Goode wrote: > # HG changeset patch > # User Durham Goode <durham@fb.com> > # Date 1395699711 25200 > # Mon Mar 24 15:21:51 2014 -0700 > # Node ID 498a1087dd60ec7234c4352e566abe977eb63332 > # Parent f85f9ea96d16e180de3e38cfc468e53441f41743 > transaction: add support for non-append files > > This adds support for normal, non-append-only files in transactions. For > example, .hg/store/fncache and .hg/store/phaseroots should be written as part of > the transaction, but are not append only files. > > This adds a journal.backupfiles along side the normal journal. This tracks which > files have been backed up as part of the transaction. transaction.addbackup() > creates a backup of the file (using a hardlink), which is later used to recover > in the event of the transaction failing. > > Using a seperate journal allows the repository to still be used by older > versions of Mercurial. A future patch will use this functionality and add tests > for it. Reading this patches I've an hard to time understand if it would behave race free is two process are trying to rollback a transaction at the same time. But the lock protection make it probably impossible. > diff --git a/mercurial/transaction.py b/mercurial/transaction.py > --- a/mercurial/transaction.py > +++ b/mercurial/transaction.py > @@ -12,8 +12,8 @@ > # GNU General Public License version 2 or any later version. > > from i18n import _ > -import errno > -import error > +import errno, os > +import error, util > > def active(func): > def _active(self, *args, **kwds): > @@ -24,22 +24,35 @@ > return _active > > def _playback(journal, report, opener, entries, unlink=True): A docstring explaining how the process happen would be great. > + backupfiles = [] > for f, o, ignore in entries: > if o or not unlink: > - try: > - fp = opener(f, 'a') > - fp.truncate(o) > - fp.close() > - except IOError: > - report(_("failed to truncate %s\n") % f) > - raise > + if isinstance(o, int): > + try: > + fp = opener(f, 'a') > + fp.truncate(o) > + fp.close() > + except IOError: > + report(_("failed to truncate %s\n") % f) > + raise > + else: > + fpath = opener.join(f) > + opath = opener.join(o) > + util.copyfile(opath, fpath) > + backupfiles.append(o) Why do you use the same list entries for two different entry. It looks like the transaction could have a fullfile entry with different content. (Using isinstance is usually considered fragile and inelegant) > else: > try: > opener.unlink(f) > except (IOError, OSError), inst: > if inst.errno != errno.ENOENT: > raise > + > opener.unlink(journal) > + backuppath = "%s.backupfiles" % journal > + if opener.exists(backuppath): > + opener.unlink(backuppath) > + for f in backupfiles: > + opener.unlink(f) > > class transaction(object): > def __init__(self, report, opener, journal, after=None, createmode=None, > @@ -56,9 +69,12 @@ > self.journal = journal > self._queue = [] > > + self.backupfilesjournal = "%s.backupfiles" % journal > self.file = opener.open(self.journal, "w") > + self.backupsfile = opener.open(self.backupfilesjournal, 'w') > if createmode is not None: > opener.chmod(self.journal, createmode & 0666) > + opener.chmod(self.backupfilesjournal, createmode & 0666) > > def __del__(self): > if self.journal: > @@ -71,11 +87,23 @@ > @active > def endgroup(self): > q = self._queue.pop() > - d = ''.join(['%s\0%d\n' % (x[0], x[1]) for x in q]) > self.entries.extend(q) > + > + offsets = [] > + backups = [] > + for f, o, _ in q: > + if isinstance(o, int): > + offsets.append((f, o)) > + else: > + backups.append((f, o)) > + d = ''.join(['%s\0%d\n' % (f, o) for f, o in offsets]) > self.file.write(d) > self.file.flush() > > + d = ''.join(['%s\0%s\0' % (f, o) for f, o in backups]) > + self.backupsfile.write(d) > + self.backupsfile.flush() > + > @active > def add(self, file, offset, data=None): > if file in self.map: > @@ -91,6 +119,27 @@ > self.file.flush() > > @active > + def addbackup(self, file): A docstring would be nice > + if file in self.map: > + return > + backupfile = "journal.%s" % file > + if self.opener.exists(file): > + filepath = self.opener.join(file) > + backuppath = self.opener.join(backupfile) > + util.copyfiles(filepath, backuppath, hardlink=True) > + else: > + self.add(file, 0) > + return > + > + if self._queue: > + self._queue[-1].append((file, backupfile)) > + return > + self.entries.append((file, backupfile, None)) > + self.map[file] = len(self.entries) - 1 > + self.backupsfile.write("%s\0%s\0" % (file, backupfile)) > + self.backupsfile.flush() > + > + @active > def find(self, file): > if file in self.map: > return self.entries[self.map[file]] > @@ -136,11 +185,16 @@ > if self.count != 0: > return > self.file.close() > - self.entries = [] > if self.after: > self.after() > if self.opener.isfile(self.journal): > self.opener.unlink(self.journal) > + if self.opener.isfile(self.backupfilesjournal): > + self.opener.unlink(self.backupfilesjournal) > + for f, o, _ in self.entries: > + if not isinstance(o, int): > + self.opener.unlink(o) > + self.entries = [] > self.journal = None > > @active > @@ -162,6 +216,7 @@ > if not self.entries: > if self.journal: > self.opener.unlink(self.journal) > + self.opener.unlink(self.backupfilesjournal) > return > > self.report(_("transaction abort!\n")) > @@ -189,4 +244,14 @@ > except ValueError: > report(_("couldn't read journal entry %r!\n") % l) > > + backupjournal = "%s.backupfiles" % file > + if opener.exists(backupjournal): > + fp = opener.open(backupjournal) > + data = fp.read() > + if len(data) > 0: > + parts = data.split('\0') > + for i in xrange(0, len(parts), 2): > + f, o = parts[i:i + 1] > + entries.append((f, o, None)) > + > _playback(file, report, opener, entries) Updating (or most probably creating :-() the dostring of this function with the on disk format would be great.
On 3/31/14 5:45 PM, "Pierre-Yves David" <pierre-yves.david@ens-lyon.org> wrote: > > >On 03/31/2014 04:19 PM, Durham Goode wrote: >> # HG changeset patch >> # User Durham Goode <durham@fb.com> >> # Date 1395699711 25200 >> # Mon Mar 24 15:21:51 2014 -0700 >> # Node ID 498a1087dd60ec7234c4352e566abe977eb63332 >> # Parent f85f9ea96d16e180de3e38cfc468e53441f41743 >> transaction: add support for non-append files >> >> This adds support for normal, non-append-only files in transactions. >>For >> example, .hg/store/fncache and .hg/store/phaseroots should be written >>as part of >> the transaction, but are not append only files. >> >> This adds a journal.backupfiles along side the normal journal. This >>tracks which >> files have been backed up as part of the transaction. >>transaction.addbackup() >> creates a backup of the file (using a hardlink), which is later used to >>recover >> in the event of the transaction failing. >> >> Using a seperate journal allows the repository to still be used by older >> versions of Mercurial. A future patch will use this functionality and >>add tests >> for it. > >> + backupfiles = [] >> for f, o, ignore in entries: >> if o or not unlink: >> - try: >> - fp = opener(f, 'a') >> - fp.truncate(o) >> - fp.close() >> - except IOError: >> - report(_("failed to truncate %s\n") % f) >> - raise >> + if isinstance(o, int): >> + try: >> + fp = opener(f, 'a') >> + fp.truncate(o) >> + fp.close() >> + except IOError: >> + report(_("failed to truncate %s\n") % f) >> + raise >> + else: >> + fpath = opener.join(f) >> + opath = opener.join(o) >> + util.copyfile(opath, fpath) >> + backupfiles.append(o) > >Why do you use the same list entries for two different entry. > >It looks like the transaction could have a fullfile entry with different >content. > >(Using isinstance is usually considered fragile and inelegant) I used the same list to minimize the amount of code churn and duplication in this patch. It's mainly a problem because transactions have a notion of 'groups' that use this single list idea and would require a fair bit of churn to make work with separate lists. I'll implement your other suggestions though.
Patch
diff --git a/mercurial/transaction.py b/mercurial/transaction.py --- a/mercurial/transaction.py +++ b/mercurial/transaction.py @@ -12,8 +12,8 @@ # GNU General Public License version 2 or any later version. from i18n import _ -import errno -import error +import errno, os +import error, util def active(func): def _active(self, *args, **kwds): @@ -24,22 +24,35 @@ return _active def _playback(journal, report, opener, entries, unlink=True): + backupfiles = [] for f, o, ignore in entries: if o or not unlink: - try: - fp = opener(f, 'a') - fp.truncate(o) - fp.close() - except IOError: - report(_("failed to truncate %s\n") % f) - raise + if isinstance(o, int): + try: + fp = opener(f, 'a') + fp.truncate(o) + fp.close() + except IOError: + report(_("failed to truncate %s\n") % f) + raise + else: + fpath = opener.join(f) + opath = opener.join(o) + util.copyfile(opath, fpath) + backupfiles.append(o) else: try: opener.unlink(f) except (IOError, OSError), inst: if inst.errno != errno.ENOENT: raise + opener.unlink(journal) + backuppath = "%s.backupfiles" % journal + if opener.exists(backuppath): + opener.unlink(backuppath) + for f in backupfiles: + opener.unlink(f) class transaction(object): def __init__(self, report, opener, journal, after=None, createmode=None, @@ -56,9 +69,12 @@ self.journal = journal self._queue = [] + self.backupfilesjournal = "%s.backupfiles" % journal self.file = opener.open(self.journal, "w") + self.backupsfile = opener.open(self.backupfilesjournal, 'w') if createmode is not None: opener.chmod(self.journal, createmode & 0666) + opener.chmod(self.backupfilesjournal, createmode & 0666) def __del__(self): if self.journal: @@ -71,11 +87,23 @@ @active def endgroup(self): q = self._queue.pop() - d = ''.join(['%s\0%d\n' % (x[0], x[1]) for x in q]) self.entries.extend(q) + + offsets = [] + backups = [] + for f, o, _ in q: + if isinstance(o, int): + offsets.append((f, o)) + else: + backups.append((f, o)) + d = ''.join(['%s\0%d\n' % (f, o) for f, o in offsets]) self.file.write(d) self.file.flush() + d = ''.join(['%s\0%s\0' % (f, o) for f, o in backups]) + self.backupsfile.write(d) + self.backupsfile.flush() + @active def add(self, file, offset, data=None): if file in self.map: @@ -91,6 +119,27 @@ self.file.flush() @active + def addbackup(self, file): + if file in self.map: + return + backupfile = "journal.%s" % file + if self.opener.exists(file): + filepath = self.opener.join(file) + backuppath = self.opener.join(backupfile) + util.copyfiles(filepath, backuppath, hardlink=True) + else: + self.add(file, 0) + return + + if self._queue: + self._queue[-1].append((file, backupfile)) + return + self.entries.append((file, backupfile, None)) + self.map[file] = len(self.entries) - 1 + self.backupsfile.write("%s\0%s\0" % (file, backupfile)) + self.backupsfile.flush() + + @active def find(self, file): if file in self.map: return self.entries[self.map[file]] @@ -136,11 +185,16 @@ if self.count != 0: return self.file.close() - self.entries = [] if self.after: self.after() if self.opener.isfile(self.journal): self.opener.unlink(self.journal) + if self.opener.isfile(self.backupfilesjournal): + self.opener.unlink(self.backupfilesjournal) + for f, o, _ in self.entries: + if not isinstance(o, int): + self.opener.unlink(o) + self.entries = [] self.journal = None @active @@ -162,6 +216,7 @@ if not self.entries: if self.journal: self.opener.unlink(self.journal) + self.opener.unlink(self.backupfilesjournal) return self.report(_("transaction abort!\n")) @@ -189,4 +244,14 @@ except ValueError: report(_("couldn't read journal entry %r!\n") % l) + backupjournal = "%s.backupfiles" % file + if opener.exists(backupjournal): + fp = opener.open(backupjournal) + data = fp.read() + if len(data) > 0: + parts = data.split('\0') + for i in xrange(0, len(parts), 2): + f, o = parts[i:i + 1] + entries.append((f, o, None)) + _playback(file, report, opener, entries)