Patchwork [3,of,3] cache: rebuild branch cache from scratch when inconsistencies are detected

login
register
mail settings
Submitter timeless
Date March 13, 2016, 2:08 a.m.
Message ID <CAAKMeYg+3-jr-GL0mG1Z2kXxcti3Ccgt8083MED1AFFeXL53Hg@mail.gmail.com>
Download mbox | patch
Permalink /patch/13849/
State Accepted
Commit bcd106d456c4168ff5e5a02f4352b92e9a310e9d
Headers show

Comments

timeless - March 13, 2016, 2:08 a.m.
Could you please add issue5058
# HG changeset patch
# User Mads Kiilerich <madski@unity3d.com>
# Date 1457831183 -3600
#      Sun Mar 13 02:06:23 2016 +0100
# Node ID 7f85c3393e4594a886b535f0679d1abca37f4604
# Parent  b8a5abd4a18ee070b922d03e5c14d63df504b78a
cache: rebuild branch cache from scratch when inconsistencies are detected

This should recover automatically from some corruptions that for unknown
reasons are seen in the wild.
Mads Kiilerich - March 13, 2016, 2:43 a.m.
On 03/13/2016 03:08 AM, timeless wrote:
>
> Could you please add issue5058
>

It was a spin-off from that issue and is somewhat related to it. The 
patches here might mitigate the problem but I don't think the real 
problem in issue5058 has been understood or solved. That is why I didn't 
add the issue. But no strong opinion on that ;-)

/Mads

> # HG changeset patch
> # User Mads Kiilerich <madski@unity3d.com <mailto:madski@unity3d.com>>
> # Date 1457831183 -3600
> #      Sun Mar 13 02:06:23 2016 +0100
> # Node ID 7f85c3393e4594a886b535f0679d1abca37f4604
> # Parent  b8a5abd4a18ee070b922d03e5c14d63df504b78a
> cache: rebuild branch cache from scratch when inconsistencies are detected
>
> This should recover automatically from some corruptions that for unknown
> reasons are seen in the wild.
>
> diff --git a/mercurial/branchmap.py b/mercurial/branchmap.py
> --- a/mercurial/branchmap.py
> +++ b/mercurial/branchmap.py
> @@ -383,6 +383,15 @@ class revbranchcache(object):
>          self._rbcnamescount = len(self._names) # number of good names 
> on disk
>          self._namesreverse = dict((b, r) for r, b in 
> enumerate(self._names))
>
> +    def _clear(self):
> +        self._rbcsnameslen = 0
> +        del self._names[:]
> +        self._rbcnamescount = 0
> +        self._namesreverse.clear()
> +        self._rbcrevslen = len(self._repo.changelog)
> +        self._rbcrevs = array('c')
> +        self._rbcrevs.fromstring('\0' * (self._rbcrevslen * _rbcrecsize))
> +
>      def branchinfo(self, rev):
>          """Return branch name and close flag for rev, using and updating
>          persistent cache."""
> @@ -408,7 +417,11 @@ class revbranchcache(object):
>          if cachenode == '\0\0\0\0':
>              pass
>          elif cachenode == reponode:
> -            return self._names[branchidx], close
> +            if branchidx < self._rbcnamescount:
> +                return self._names[branchidx], close
> +            # referenced branch doesn't exist - rebuild is expensive 
> but needed
> +            self._repo.ui.debug("rebuilding corrupted revision branch 
> cache\n")
> +            self._clear()
>          else:
>              # rev/node map has changed, invalidate the cache from here up
>              truncate = rbcrevidx + _rbcrecsize
> diff --git a/tests/test-branches.t b/tests/test-branches.t
> --- a/tests/test-branches.t
> +++ b/tests/test-branches.t
> @@ -629,4 +629,25 @@ situation where the cache is out of sync
>    $ f --size .hg/cache/rbc-revs*
>    .hg/cache/rbc-revs-v1: size=112
>
> +cache is rebuilt when corruption is detected
> +  $ echo > .hg/cache/rbc-names-v1
> +  $ hg log -r '5:&branch(.)' -T '{rev} ' --debug
> +  rebuilding corrupted revision branch cache
> +  8 9 10 11 12 13 truncating cache/rbc-revs-v1 to 40
> +  $ f --size --hexdump .hg/cache/rbc-*
> +  .hg/cache/rbc-names-v1: size=79
> +  0000: 62 00 61 00 63 00 61 20 62 72 61 6e 63 68 20 6e |b.a.c.a 
> branch n|
> +  0010: 61 6d 65 20 6d 75 63 68 20 6c 6f 6e 67 65 72 20 |ame much 
> longer |
> +  0020: 74 68 61 6e 20 74 68 65 20 64 65 66 61 75 6c 74 |than the 
> default|
> +  0030: 20 6a 75 73 74 69 66 69 63 61 74 69 6f 6e 20 75 | 
> justification u|
> +  0040: 73 65 64 20 62 79 20 62 72 61 6e 63 68 65 73    |sed by branches|
> +  .hg/cache/rbc-revs-v1: size=112
> +  0000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
> |................|
> +  0010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 
> |................|
> +  0020: 00 00 00 00 00 00 00 00 d8 cb c6 1d 00 00 00 01 
> |................|
> +  0030: 58 97 36 a2 00 00 00 02 10 ff 58 95 00 00 00 03 
> |X.6.......X.....|
> +  0040: ee bb 94 44 00 00 00 00 5f 40 61 bb 00 00 00 00 
> |...D...._@a.....|
> +  0050: bf be 84 1b 00 00 00 00 d3 f1 63 45 80 00 00 00 
> |..........cE....|
> +  0060: e3 d4 9c 05 80 00 00 00 e2 3b 55 05 00 00 00 00 
> |.........;U.....|
> +
>    $ cd ..
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org 
> <mailto:Mercurial-devel@mercurial-scm.org>
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

Patch

diff --git a/mercurial/branchmap.py b/mercurial/branchmap.py
--- a/mercurial/branchmap.py
+++ b/mercurial/branchmap.py
@@ -383,6 +383,15 @@  class revbranchcache(object):
         self._rbcnamescount = len(self._names) # number of good names on
disk
         self._namesreverse = dict((b, r) for r, b in
enumerate(self._names))

+    def _clear(self):
+        self._rbcsnameslen = 0
+        del self._names[:]
+        self._rbcnamescount = 0
+        self._namesreverse.clear()
+        self._rbcrevslen = len(self._repo.changelog)
+        self._rbcrevs = array('c')
+        self._rbcrevs.fromstring('\0' * (self._rbcrevslen * _rbcrecsize))
+
     def branchinfo(self, rev):
         """Return branch name and close flag for rev, using and updating
         persistent cache."""
@@ -408,7 +417,11 @@  class revbranchcache(object):
         if cachenode == '\0\0\0\0':
             pass
         elif cachenode == reponode:
-            return self._names[branchidx], close
+            if branchidx < self._rbcnamescount:
+                return self._names[branchidx], close
+            # referenced branch doesn't exist - rebuild is expensive but
needed
+            self._repo.ui.debug("rebuilding corrupted revision branch
cache\n")
+            self._clear()
         else:
             # rev/node map has changed, invalidate the cache from here up
             truncate = rbcrevidx + _rbcrecsize
diff --git a/tests/test-branches.t b/tests/test-branches.t
--- a/tests/test-branches.t
+++ b/tests/test-branches.t
@@ -629,4 +629,25 @@  situation where the cache is out of sync
   $ f --size .hg/cache/rbc-revs*
   .hg/cache/rbc-revs-v1: size=112

+cache is rebuilt when corruption is detected
+  $ echo > .hg/cache/rbc-names-v1
+  $ hg log -r '5:&branch(.)' -T '{rev} ' --debug
+  rebuilding corrupted revision branch cache
+  8 9 10 11 12 13 truncating cache/rbc-revs-v1 to 40
+  $ f --size --hexdump .hg/cache/rbc-*
+  .hg/cache/rbc-names-v1: size=79
+  0000: 62 00 61 00 63 00 61 20 62 72 61 6e 63 68 20 6e |b.a.c.a branch n|
+  0010: 61 6d 65 20 6d 75 63 68 20 6c 6f 6e 67 65 72 20 |ame much longer |
+  0020: 74 68 61 6e 20 74 68 65 20 64 65 66 61 75 6c 74 |than the default|
+  0030: 20 6a 75 73 74 69 66 69 63 61 74 69 6f 6e 20 75 | justification u|
+  0040: 73 65 64 20 62 79 20 62 72 61 6e 63 68 65 73    |sed by branches|
+  .hg/cache/rbc-revs-v1: size=112
+  0000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
+  0010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 |................|
+  0020: 00 00 00 00 00 00 00 00 d8 cb c6 1d 00 00 00 01 |................|
+  0030: 58 97 36 a2 00 00 00 02 10 ff 58 95 00 00 00 03 |X.6.......X.....|
+  0040: ee bb 94 44 00 00 00 00 5f 40 61 bb 00 00 00 00 |...D...._@a.....|
+  0050: bf be 84 1b 00 00 00 00 d3 f1 63 45 80 00 00 00 |..........cE....|
+  0060: e3 d4 9c 05 80 00 00 00 e2 3b 55 05 00 00 00 00 |.........;U.....|
+
   $ cd ..