Patchwork [1,of,5] changelog: rework the delayupdate mechanism

login
register
mail settings
Submitter Pierre-Yves David
Date Nov. 2, 2014, 2:20 p.m.
Message ID <a438ab1f5765f6709f7d.1414938039@marginatus.alto.octopoid.net>
Download mbox | patch
Permalink /patch/6525/
State Accepted
Headers show

Comments

Pierre-Yves David - Nov. 2, 2014, 2:20 p.m.
# HG changeset patch
# User Pierre-Yves David <pierre-yves.david@fb.com>
# Date 1413619938 25200
#      Sat Oct 18 01:12:18 2014 -0700
# Node ID a438ab1f5765f6709f7d62b6b9dd92eda3e34730
# Parent  cc1cbb0bba8ed1d95c8f1b8e27d4d2893e0dcca7
changelog: rework the delayupdate mechanism

The current way we use the `delayupdate` mechanism is wrong. We call
`delayupdate` right after the transaction retrieval, then we call `finalize`
right before calling `tr.close()`. The `finalize` call will -always- result in a
flush to disk, making the data available to all readers. But the `tr.close()` may
be a no-op if the transaction is nested. This would result in data:

1) exposed to reader to early,
2) rollbacked by other part of the transaction after such exposition.

So we need to end up in a situation where we call `finalize` a single time when
the transaction actually close. For this purpose we need to be able to call
`delayupdate` and `_writepending` multiple times and `finalize` once. This was
not possible with the previous state of the code.

This changeset re-factor the code to makes this possible. We buffer data in memory
as much as possible and fall-back to writing to a ".a" file after the first call
to `_writepending`.
Pierre-Yves David - Nov. 5, 2014, 4:06 p.m.
On 11/02/2014 02:20 PM, Pierre-Yves David wrote:
> # HG changeset patch
> # User Pierre-Yves David <pierre-yves.david@fb.com>
> # Date 1413619938 25200
> #      Sat Oct 18 01:12:18 2014 -0700
> # Node ID a438ab1f5765f6709f7d62b6b9dd92eda3e34730
> # Parent  cc1cbb0bba8ed1d95c8f1b8e27d4d2893e0dcca7
> changelog: rework the delayupdate mechanism

Any new on this series? This is the first in a long series and fixing 
transaction is my current main priority.
Matt Mackall - Nov. 5, 2014, 4:38 p.m.
On Wed, 2014-11-05 at 16:06 +0000, Pierre-Yves David wrote:
> 
> On 11/02/2014 02:20 PM, Pierre-Yves David wrote:
> > # HG changeset patch
> > # User Pierre-Yves David <pierre-yves.david@fb.com>
> > # Date 1413619938 25200
> > #      Sat Oct 18 01:12:18 2014 -0700
> > # Node ID a438ab1f5765f6709f7d62b6b9dd92eda3e34730
> > # Parent  cc1cbb0bba8ed1d95c8f1b8e27d4d2893e0dcca7
> > changelog: rework the delayupdate mechanism
> 
> Any new on this series? This is the first in a long series and fixing 
> transaction is my current main priority.

http://mercurial.selenic.com/wiki/mpm/flow

Patch

diff --git a/mercurial/changelog.py b/mercurial/changelog.py
--- a/mercurial/changelog.py
+++ b/mercurial/changelog.py
@@ -106,30 +106,36 @@  class appender(object):
 
     def write(self, s):
         self.data.append(str(s))
         self.offset += len(s)
 
-def delayopener(opener, target, divert, buf):
-    def o(name, mode='r'):
+def _divertopener(opener, target):
+    """build an opener that write in `target.a` instead of `target`"""
+    def _divert(name, mode='r'):
         if name != target:
             return opener(name, mode)
-        if divert:
-            return opener(name + ".a", mode.replace('a', 'w'))
-        # otherwise, divert to memory
+        return opener(name + ".a", mode)
+    return _divert
+
+def _delayopener(opener, target, buf):
+    """build an opener that store chunk in `buf` instead of `target`"""
+    def _delay(name, mode='r'):
+        if name != target:
+            return opener(name, mode)
         return appender(opener, name, mode, buf)
-    return o
+    return _delay
 
 class changelog(revlog.revlog):
     def __init__(self, opener):
         revlog.revlog.__init__(self, opener, "00changelog.i")
         if self._initempty:
             # changelogs don't benefit from generaldelta
             self.version &= ~revlog.REVLOGGENERALDELTA
             self._generaldelta = False
         self._realopener = opener
         self._delayed = False
-        self._delaybuf = []
+        self._delaybuf = None
         self._divert = False
         self.filteredrevs = frozenset()
 
     def tip(self):
         """filtered version of revlog.tip"""
@@ -218,31 +224,40 @@  class changelog(revlog.revlog):
             raise error.FilteredIndexError(rev)
         return super(changelog, self).flags(rev)
 
     def delayupdate(self):
         "delay visibility of index updates to other readers"
+
+        if not self._delayed:
+            if len(self) == 0:
+                self._divert = True
+                if self._realopener.exists(self.indexfile + '.a'):
+                    self._realopener.unlink(self.indexfile + '.a')
+                self.opener = _divertopener(self._realopener, self.indexfile)
+            else:
+                self._delaybuf = []
+                self.opener = _delayopener(self._realopener, self.indexfile,
+                                           self._delaybuf)
         self._delayed = True
-        self._divert = (len(self) == 0)
-        self._delaybuf = []
-        self.opener = delayopener(self._realopener, self.indexfile,
-                                  self._divert, self._delaybuf)
 
     def finalize(self, tr):
         "finalize index updates"
         self._delayed = False
         self.opener = self._realopener
         # move redirected index data back into place
         if self._divert:
+            assert not self._delaybuf
             tmpname = self.indexfile + ".a"
             nfile = self.opener.open(tmpname)
             nfile.close()
             self.opener.rename(tmpname, self.indexfile)
         elif self._delaybuf:
             fp = self.opener(self.indexfile, 'a')
             fp.write("".join(self._delaybuf))
             fp.close()
-            self._delaybuf = []
+            self._delaybuf = None
+        self._divert = False
         # split when we're done
         self.checkinlinesize(tr)
 
     def readpending(self, file):
         r = revlog.revlog(self.opener, file)
@@ -260,12 +275,13 @@  class changelog(revlog.revlog):
             fp2.write(fp1.read())
             # add pending data
             fp2.write("".join(self._delaybuf))
             fp2.close()
             # switch modes so finalize can simply rename
-            self._delaybuf = []
+            self._delaybuf = None
             self._divert = True
+            self.opener = _divertopener(self._realopener, self.indexfile)
 
         if self._divert:
             return True
 
         return False