Patchwork [02,of,12] branchmap: cache open/closed branch head information

login
register
mail settings
Submitter Brodie Rao
Date Nov. 16, 2013, 2:27 a.m.
Message ID <baa013be987aeea0da0a.1384568854@hit-nxdomain.opendns.com>
Download mbox | patch
Permalink /patch/2960/
State Superseded
Headers show

Comments

Brodie Rao - Nov. 16, 2013, 2:27 a.m.
# HG changeset patch
# User Brodie Rao <brodie@sf.io>
# Date 1379318909 25200
#      Mon Sep 16 01:08:29 2013 -0700
# Node ID baa013be987aeea0da0a95f7a55496941c7d4e2c
# Parent  1e160d0fd711f4a709fe033a122c867d299a8afc
branchmap: cache open/closed branch head information

This lets us determine the open/closed state of a branch without
reading from the changelog (which can be costly over NFS and/or with
many branches).
Pierre-Yves David - Nov. 16, 2013, 3:38 a.m.
On 11/15/2013 09:27 PM, Brodie Rao wrote:
> # HG changeset patch
> # User Brodie Rao <brodie@sf.io>
> # Date 1379318909 25200
> #      Mon Sep 16 01:08:29 2013 -0700
> # Node ID baa013be987aeea0da0a95f7a55496941c7d4e2c
> # Parent  1e160d0fd711f4a709fe033a122c867d299a8afc
> branchmap: cache open/closed branch head information
>
> This lets us determine the open/closed state of a branch without
> reading from the changelog (which can be costly over NFS and/or with
> many branches).

This patch introduce a new cache file with a new format, but I can't 
locate any documentation about this new format.

We really want some inline documentation about the format and some 
details about how it impact the code (the patch touch quite a lot of new)

Having this new cache is an awesome new however!
Brodie Rao - Nov. 16, 2013, 3:48 a.m.
On Fri, Nov 15, 2013 at 10:38 PM, Pierre-Yves David
<pierre-yves.david@ens-lyon.org> wrote:
> On 11/15/2013 09:27 PM, Brodie Rao wrote:
>>
>> # HG changeset patch
>> # User Brodie Rao <brodie@sf.io>
>> # Date 1379318909 25200
>> #      Mon Sep 16 01:08:29 2013 -0700
>> # Node ID baa013be987aeea0da0a95f7a55496941c7d4e2c
>> # Parent  1e160d0fd711f4a709fe033a122c867d299a8afc
>> branchmap: cache open/closed branch head information
>>
>> This lets us determine the open/closed state of a branch without
>> reading from the changelog (which can be costly over NFS and/or with
>> many branches).
>
>
> This patch introduce a new cache file with a new format, but I can't locate
> any documentation about this new format.
>
> We really want some inline documentation about the format and some details
> about how it impact the code (the patch touch quite a lot of new)

I'll add a new patch to the beginning of the series that adds
documentation for the existing format, and I'll update this patch so
it updates that documentation.

I'll send a V2 series in a bit.

> Having this new cache is an awesome new however!
>
> --
> Pierre-Yves David

Patch

diff --git a/mercurial/branchmap.py b/mercurial/branchmap.py
--- a/mercurial/branchmap.py
+++ b/mercurial/branchmap.py
@@ -11,7 +11,7 @@  import util, repoview
 
 def _filename(repo):
     """name of a branchcache file for a given repo or repoview"""
-    filename = "cache/branchheads"
+    filename = "cache/branch2"
     if repo.filtername:
         filename = '%s-%s' % (filename, repo.filtername)
     return filename
@@ -39,11 +39,16 @@  def read(repo):
         for l in lines:
             if not l:
                 continue
-            node, label = l.split(" ", 1)
+            node, state, label = l.split(" ", 2)
+            if state not in 'oc':
+                raise ValueError('invalid branch state')
             label = encoding.tolocal(label.strip())
             if not node in repo:
                 raise ValueError('node %s does not exist' % node)
-            partial.setdefault(label, []).append(bin(node))
+            node = bin(node)
+            partial.setdefault(label, []).append(node)
+            if state == 'c':
+                partial._closednodes.add(node)
     except KeyboardInterrupt:
         raise
     except Exception, inst:
@@ -86,11 +91,18 @@  class branchcache(dict):
     """A dict like object that hold branches heads cache"""
 
     def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
-                 filteredhash=None):
+                 filteredhash=None, closednodes=None):
         super(branchcache, self).__init__(entries)
         self.tipnode = tipnode
         self.tiprev = tiprev
         self.filteredhash = filteredhash
+        # closednodes is a set of nodes that close their branch. If the branch
+        # cache has been updated, it may contain nodes that are no longer
+        # heads.
+        if closednodes is None:
+            self._closednodes = set()
+        else:
+            self._closednodes = closednodes
 
     def _hashfiltered(self, repo):
         """build hash of revision filtered in the current cache
@@ -126,7 +138,8 @@  class branchcache(dict):
 
     def copy(self):
         """return an deep copy of the branchcache object"""
-        return branchcache(self, self.tipnode, self.tiprev, self.filteredhash)
+        return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
+                           self._closednodes)
 
     def write(self, repo):
         try:
@@ -137,7 +150,12 @@  class branchcache(dict):
             f.write(" ".join(cachekey) + '\n')
             for label, nodes in sorted(self.iteritems()):
                 for node in nodes:
-                    f.write("%s %s\n" % (hex(node), encoding.fromlocal(label)))
+                    if node in self._closednodes:
+                        state = 'c'
+                    else:
+                        state = 'o'
+                    f.write("%s %s %s\n" % (hex(node), state,
+                                            encoding.fromlocal(label)))
             f.close()
         except (IOError, OSError, util.Abort):
             # Abort may be raise by read only opener
@@ -151,9 +169,13 @@  class branchcache(dict):
         cl = repo.changelog
         # collect new branch entries
         newbranches = {}
-        getbranch = cl.branch
+        getbranchinfo = cl.branchinfo
         for r in revgen:
-            newbranches.setdefault(getbranch(r), []).append(cl.node(r))
+            branch, closesbranch = getbranchinfo(r)
+            node = cl.node(r)
+            newbranches.setdefault(branch, []).append(node)
+            if closesbranch:
+                self._closednodes.add(node)
         # if older branchheads are reachable from new ones, they aren't
         # really branchheads. Note checking parents is insufficient:
         # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
diff --git a/mercurial/changelog.py b/mercurial/changelog.py
--- a/mercurial/changelog.py
+++ b/mercurial/changelog.py
@@ -342,9 +342,10 @@  class changelog(revlog.revlog):
         text = "\n".join(l)
         return self.addrevision(text, transaction, len(self), p1, p2)
 
-    def branch(self, rev):
-        """return the branch of a revision
+    def branchinfo(self, rev):
+        """return the branch name and open/close state of a revision
 
         This function exists because creating a changectx object
         just to access this is costly."""
-        return encoding.tolocal(self.read(rev)[5].get("branch"))
+        extra = self.read(rev)[5]
+        return encoding.tolocal(extra.get("branch")), 'close' in extra
diff --git a/mercurial/hg.py b/mercurial/hg.py
--- a/mercurial/hg.py
+++ b/mercurial/hg.py
@@ -337,8 +337,8 @@  def clone(ui, peeropts, source, dest=Non
             # Recomputing branch cache might be slow on big repos,
             # so just copy it
             dstcachedir = os.path.join(destpath, 'cache')
-            srcbranchcache = srcrepo.sjoin('cache/branchheads')
-            dstbranchcache = os.path.join(dstcachedir, 'branchheads')
+            srcbranchcache = srcrepo.sjoin('cache/branch2')
+            dstbranchcache = os.path.join(dstcachedir, 'branch2')
             if os.path.exists(srcbranchcache):
                 if not os.path.exists(dstcachedir):
                     os.mkdir(dstcachedir)
diff --git a/tests/test-fncache.t b/tests/test-fncache.t
--- a/tests/test-fncache.t
+++ b/tests/test-fncache.t
@@ -70,7 +70,7 @@  Non store repo:
   .hg/00changelog.i
   .hg/00manifest.i
   .hg/cache
-  .hg/cache/branchheads-served
+  .hg/cache/branch2-served
   .hg/data
   .hg/data/tst.d.hg
   .hg/data/tst.d.hg/foo.i
@@ -98,7 +98,7 @@  Non fncache repo:
   .hg
   .hg/00changelog.i
   .hg/cache
-  .hg/cache/branchheads-served
+  .hg/cache/branch2-served
   .hg/dirstate
   .hg/last-message.txt
   .hg/requires
diff --git a/tests/test-hardlinks.t b/tests/test-hardlinks.t
--- a/tests/test-hardlinks.t
+++ b/tests/test-hardlinks.t
@@ -196,7 +196,7 @@  r4 has hardlinks in the working dir (not
   $ nlinksdir r4
   2 r4/.hg/00changelog.i
   2 r4/.hg/branch
-  2 r4/.hg/cache/branchheads-served
+  2 r4/.hg/cache/branch2-served
   2 r4/.hg/dirstate
   2 r4/.hg/hgrc
   2 r4/.hg/last-message.txt
@@ -226,7 +226,7 @@  Update back to revision 11 in r4 should 
   $ nlinksdir r4
   2 r4/.hg/00changelog.i
   1 r4/.hg/branch
-  2 r4/.hg/cache/branchheads-served
+  2 r4/.hg/cache/branch2-served
   1 r4/.hg/dirstate
   2 r4/.hg/hgrc
   2 r4/.hg/last-message.txt
diff --git a/tests/test-inherit-mode.t b/tests/test-inherit-mode.t
--- a/tests/test-inherit-mode.t
+++ b/tests/test-inherit-mode.t
@@ -66,7 +66,7 @@  new directories are setgid
   00700 ./.hg/
   00600 ./.hg/00changelog.i
   00770 ./.hg/cache/
-  00660 ./.hg/cache/branchheads-served
+  00660 ./.hg/cache/branch2-served
   00660 ./.hg/dirstate
   00660 ./.hg/last-message.txt
   00600 ./.hg/requires
@@ -111,7 +111,7 @@  group can still write everything
   00770 ../push/.hg/
   00660 ../push/.hg/00changelog.i
   00770 ../push/.hg/cache/
-  00660 ../push/.hg/cache/branchheads-base
+  00660 ../push/.hg/cache/branch2-base
   00660 ../push/.hg/requires
   00770 ../push/.hg/store/
   00660 ../push/.hg/store/00changelog.i
diff --git a/tests/test-newbranch.t b/tests/test-newbranch.t
--- a/tests/test-newbranch.t
+++ b/tests/test-newbranch.t
@@ -1,13 +1,13 @@ 
-  $ branchcache=.hg/cache/branchheads
+  $ branchcache=.hg/cache/branch2
 
   $ listbranchcaches() {
-  >    for f in .hg/cache/branchheads*;
+  >    for f in .hg/cache/branch2*;
   >       do echo === $f ===;
   >       cat $f;
   >     done;
   > }
   $ purgebranchcaches() {
-  >     rm .hg/cache/branchheads*
+  >     rm .hg/cache/branch2*
   > }
 
   $ hg init t
@@ -158,13 +158,13 @@  Test for invalid branch cache:
   4:adf1a74a7f7b
 
   $ listbranchcaches
-  === .hg/cache/branchheads ===
+  === .hg/cache/branch2 ===
   corrupted
-  === .hg/cache/branchheads-served ===
+  === .hg/cache/branch2-served ===
   adf1a74a7f7b4cd193d12992f5d0d6a004ed21d6 4
-  c21617b13b220988e7a2e26290fbe4325ffa7139 bar
-  1c28f494dae69a2f8fc815059d257eccf3fcfe75 default
-  adf1a74a7f7b4cd193d12992f5d0d6a004ed21d6 foo
+  c21617b13b220988e7a2e26290fbe4325ffa7139 o bar
+  1c28f494dae69a2f8fc815059d257eccf3fcfe75 o default
+  adf1a74a7f7b4cd193d12992f5d0d6a004ed21d6 o foo
 
 Push should update the branch cache:
 
@@ -175,20 +175,20 @@  Pushing just rev 0:
   $ hg push -qr 0 ../target
 
   $ (cd ../target/; listbranchcaches)
-  === .hg/cache/branchheads-base ===
+  === .hg/cache/branch2-base ===
   db01e8ea3388fd3c7c94e1436ea2bd6a53d581c5 0
-  db01e8ea3388fd3c7c94e1436ea2bd6a53d581c5 default
+  db01e8ea3388fd3c7c94e1436ea2bd6a53d581c5 o default
 
 Pushing everything:
 
   $ hg push -qf ../target
 
   $ (cd ../target/; listbranchcaches)
-  === .hg/cache/branchheads-base ===
+  === .hg/cache/branch2-base ===
   adf1a74a7f7b4cd193d12992f5d0d6a004ed21d6 4
-  c21617b13b220988e7a2e26290fbe4325ffa7139 bar
-  1c28f494dae69a2f8fc815059d257eccf3fcfe75 default
-  adf1a74a7f7b4cd193d12992f5d0d6a004ed21d6 foo
+  c21617b13b220988e7a2e26290fbe4325ffa7139 o bar
+  1c28f494dae69a2f8fc815059d257eccf3fcfe75 o default
+  adf1a74a7f7b4cd193d12992f5d0d6a004ed21d6 o foo
 
 Update with no arguments: tipmost revision of the current branch:
 
diff --git a/tests/test-phases.t b/tests/test-phases.t
--- a/tests/test-phases.t
+++ b/tests/test-phases.t
@@ -175,28 +175,28 @@  visible shared between the initial repo 
 
 check that branch cache with "served" filter are properly computed and stored
 
-  $ ls ../push-dest/.hg/cache/branchheads*
-  ../push-dest/.hg/cache/branchheads-served
-  $ cat ../push-dest/.hg/cache/branchheads-served
+  $ ls ../push-dest/.hg/cache/branch2*
+  ../push-dest/.hg/cache/branch2-served
+  $ cat ../push-dest/.hg/cache/branch2-served
   6d6770faffce199f1fddd1cf87f6f026138cf061 6 465891ffab3c47a3c23792f7dc84156e19a90722
-  b3325c91a4d916bcc4cdc83ea3fe4ece46a42f6e default
-  6d6770faffce199f1fddd1cf87f6f026138cf061 default
+  b3325c91a4d916bcc4cdc83ea3fe4ece46a42f6e o default
+  6d6770faffce199f1fddd1cf87f6f026138cf061 o default
   $ hg heads -R ../push-dest --template '{rev}:{node} {phase}\n'  #update visible cache too
   6:6d6770faffce199f1fddd1cf87f6f026138cf061 draft
   5:2713879da13d6eea1ff22b442a5a87cb31a7ce6a secret
   3:b3325c91a4d916bcc4cdc83ea3fe4ece46a42f6e draft
-  $ ls ../push-dest/.hg/cache/branchheads*
-  ../push-dest/.hg/cache/branchheads-served
-  ../push-dest/.hg/cache/branchheads-visible
-  $ cat ../push-dest/.hg/cache/branchheads-served
+  $ ls ../push-dest/.hg/cache/branch2*
+  ../push-dest/.hg/cache/branch2-served
+  ../push-dest/.hg/cache/branch2-visible
+  $ cat ../push-dest/.hg/cache/branch2-served
   6d6770faffce199f1fddd1cf87f6f026138cf061 6 465891ffab3c47a3c23792f7dc84156e19a90722
-  b3325c91a4d916bcc4cdc83ea3fe4ece46a42f6e default
-  6d6770faffce199f1fddd1cf87f6f026138cf061 default
-  $ cat ../push-dest/.hg/cache/branchheads-visible
+  b3325c91a4d916bcc4cdc83ea3fe4ece46a42f6e o default
+  6d6770faffce199f1fddd1cf87f6f026138cf061 o default
+  $ cat ../push-dest/.hg/cache/branch2-visible
   6d6770faffce199f1fddd1cf87f6f026138cf061 6
-  b3325c91a4d916bcc4cdc83ea3fe4ece46a42f6e default
-  2713879da13d6eea1ff22b442a5a87cb31a7ce6a default
-  6d6770faffce199f1fddd1cf87f6f026138cf061 default
+  b3325c91a4d916bcc4cdc83ea3fe4ece46a42f6e o default
+  2713879da13d6eea1ff22b442a5a87cb31a7ce6a o default
+  6d6770faffce199f1fddd1cf87f6f026138cf061 o default
 
 
 Restore condition prior extra insertion.
diff --git a/tests/test-rebase-collapse.t b/tests/test-rebase-collapse.t
--- a/tests/test-rebase-collapse.t
+++ b/tests/test-rebase-collapse.t
@@ -275,18 +275,18 @@  We keep it the test this way in case new
   7:c65502d4178782309ce0574c5ae6ee9485a9bafa default
   6:c772a8b2dc17629cec88a19d09c926c4814b12c7 default
 
-  $ cat $TESTTMP/b2/.hg/cache/branchheads-served
+  $ cat $TESTTMP/b2/.hg/cache/branch2-served
   c65502d4178782309ce0574c5ae6ee9485a9bafa 7
-  c772a8b2dc17629cec88a19d09c926c4814b12c7 default
-  c65502d4178782309ce0574c5ae6ee9485a9bafa default
+  c772a8b2dc17629cec88a19d09c926c4814b12c7 o default
+  c65502d4178782309ce0574c5ae6ee9485a9bafa o default
 
   $ hg strip 4
   saved backup bundle to $TESTTMP/b2/.hg/strip-backup/8a5212ebc852-backup.hg (glob)
 
-  $ cat $TESTTMP/b2/.hg/cache/branchheads-served
+  $ cat $TESTTMP/b2/.hg/cache/branch2-served
   c65502d4178782309ce0574c5ae6ee9485a9bafa 4
-  2870ad076e541e714f3c2bc32826b5c6a6e5b040 default
-  c65502d4178782309ce0574c5ae6ee9485a9bafa default
+  2870ad076e541e714f3c2bc32826b5c6a6e5b040 o default
+  c65502d4178782309ce0574c5ae6ee9485a9bafa o default
 
   $ hg heads --template="{rev}:{node} {branch}\n"
   4:c65502d4178782309ce0574c5ae6ee9485a9bafa default