From patchwork Thu Oct 16 14:49:34 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [2,of,2,v5] revset: use localrepo persistent branch name caching From: Mads Kiilerich X-Patchwork-Id: 6328 Message-Id: To: mercurial-devel@selenic.com Date: Thu, 16 Oct 2014 16:49:34 +0200 # HG changeset patch # User Mads Kiilerich # Date 1413470216 -7200 # Thu Oct 16 16:36:56 2014 +0200 # Node ID afd86cc6fd978743e49547abab31a4b7dafb95d0 # Parent efe19367c4809045dece68abded2a3297c16a7a8 revset: use localrepo persistent branch name caching Branch name filtering in revsets was expensive. For every rev it created a changectx and called .branch(). (Using changelog.branchinfo() would make it a bit faster.) Instead, use the new localrepo caching branch lookup method. On the small hg repo: hg log --time -r 'branch(stable) & branch(default)' Before: time: real 1.910 secs (user 1.890+0.000 sys 0.020+0.000) After: time: real 1.240 secs (user 1.230+0.000 sys 0.010+0.000) time: real 0.120 secs (user 0.110+0.000 sys 0.000+0.000) On mozilla-central with 210557:a280a03c9f3c : hg --time log -r 'branch(mobile)' -T. Before: time: real 10.450 secs (user 10.390+0.000 sys 0.060+0.000) After: time: real 7.640 secs (user 7.480+0.000 sys 0.140+0.000) time: real 0.520 secs (user 0.490+0.000 sys 0.030+0.000) First run is 35%/27% faster (primarily because the new code path uses changelog.branchinfo instead changectx.branch and we avoid messing with localrepo). Following runs will use the cache and are 16x/20x faster. diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -478,6 +478,7 @@ def branch(repo, subset, x): a regular expression. To match a branch that actually starts with `re:`, use the prefix `literal:`. """ + branch = repo.revbranchcache.branch try: b = getstring(x, '') except error.ParseError: @@ -489,16 +490,16 @@ def branch(repo, subset, x): # note: falls through to the revspec case if no branch with # this name exists if pattern in repo.branchmap(): - return subset.filter(lambda r: matcher(repo[r].branch())) + return subset.filter(lambda r: matcher(branch(r))) else: - return subset.filter(lambda r: matcher(repo[r].branch())) + return subset.filter(lambda r: matcher(branch(r))) s = getset(repo, spanset(repo), x) b = set() for r in s: - b.add(repo[r].branch()) + b.add(branch(r)) c = s.__contains__ - return subset.filter(lambda r: c(r) or repo[r].branch() in b) + return subset.filter(lambda r: c(r) or branch(r) in b) def bumped(repo, subset, x): """``bumped()`` @@ -1431,7 +1432,7 @@ def matching(repo, subset, x): getfieldfuncs = [] _funcs = { 'user': lambda r: repo[r].user(), - 'branch': lambda r: repo[r].branch(), + 'branch': repo.revbranchcache.branch, 'date': lambda r: repo[r].date(), 'description': lambda r: repo[r].description(), 'files': lambda r: repo[r].files(), @@ -1532,9 +1533,9 @@ def sort(repo, subset, x): elif k == '-rev': e.append(-r) elif k == 'branch': - e.append(c.branch()) + e.append(repo.revbranchcache.branch(r)) elif k == '-branch': - e.append(invert(c.branch())) + e.append(invert(repo.revbranchcache.branch(r))) elif k == 'desc': e.append(c.description()) elif k == '-desc': diff --git a/tests/test-branches.t b/tests/test-branches.t --- a/tests/test-branches.t +++ b/tests/test-branches.t @@ -516,4 +516,71 @@ template output: } ] +revision branch name caching implementation + +cache creation + $ rm .hg/cache/branchnames + $ hg debugrevspec 'max(branch("b"))' + 13 + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + 2c243ef8a7c2503d205bc6116f04d423 .hg/cache/branchnames +recovery from invalid cache file + $ echo > .hg/cache/branchnames + $ hg debugrevspec 'max(branch("b"))' + 13 + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + 2c243ef8a7c2503d205bc6116f04d423 .hg/cache/branchnames +recovery from other corruption - extra trailing data + $ echo >> .hg/cache/branchnames + $ hg debugrevspec 'max(branch("b"))' + 13 + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + 2c243ef8a7c2503d205bc6116f04d423 .hg/cache/branchnames +lazy update after commit + $ hg tag tag + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + 2c243ef8a7c2503d205bc6116f04d423 .hg/cache/branchnames + $ hg debugrevspec 'max(branch("b"))' + 15 + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + e0f1a43e584afd743115a8eeaa38b6b4 .hg/cache/branchnames +update after rollback + $ hg up -qr '.^' + $ hg rollback -qf + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + e0f1a43e584afd743115a8eeaa38b6b4 .hg/cache/branchnames + $ hg debugrevspec 'max(branch("b"))' + 13 + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + 2c243ef8a7c2503d205bc6116f04d423 .hg/cache/branchnames +handle history mutations that doesn't change the tip node - this is a problem +with the cache invalidation scheme used by branchmap + $ hg log -r tip -T'{rev}:{node|short}\n' + 14:f894c25619d3 + $ hg bundle -q --all bu.hg + $ hg --config extensions.strip= strip --no-b -qr -1: + $ hg up -q tip + $ hg branch -q hacked + $ hg ci --amend -qm 'hacked' + $ hg pull -q bu.hg -r f894c25619d3 + $ hg log -r tip -T'{rev}:{node|short}\n' + 14:f894c25619d3 + $ hg debugrevspec 'branch("hacked")' + 13 + $ hg debugrevspec 'max(branch("b"))' + 12 + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + a067ccd62fcf15722b9515842b3ed4d3 .hg/cache/branchnames + $ hg --config extensions.strip= strip --no-b -qr -2: + $ hg pull -q bu.hg + $ rm bu.hg + $ hg up -qr tip + $ hg log -r tip -T'{rev}:{node|short}\n' + 14:f894c25619d3 + $ hg debugrevspec 'max(branch("b"))' + 13 + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + 55227148664f02d086bc3bf07c23180b .hg/cache/branchnames +(the cache file still contains the now unused 'hacked' branch name) + $ cd ..