Submitter | Mads Kiilerich |
---|---|
Date | Oct. 16, 2014, 1:48 a.m. |
Message ID | <b0e40604be81ff0716e1.1413424104@ssl.google-analytics.com> |
Download | mbox | patch |
Permalink | /patch/6312/ |
State | Superseded |
Headers | show |
Comments
On 10/15/2014 06:48 PM, Mads Kiilerich wrote: > # HG changeset patch > # User Mads Kiilerich <madski@unity3d.com> > # Date 1413424007 -7200 > # Thu Oct 16 03:46:47 2014 +0200 > # Node ID b0e40604be81ff0716e1254f7ce7884599457f71 > # Parent d87009e720063a8d6d80afdbe6bb9323e2849030 > revset: use localrepo persistent branch name caching > > Branch name filtering in revsets was expensive. For every rev it created a > changectx and called .branch(). (Using changelog.branchinfo() would make it a > bit faster.) > > Instead, use the new localrepo caching branch lookup method. Could you get ctx.branch() to use this cache too ? Would that benefit anything? The branchmap code could use it too. and would prewarm it for you. > > On the small hg repo: > hg log --time -r 'branch(stable) & branch(default)' > Before: > time: real 1.910 secs (user 1.890+0.000 sys 0.020+0.000) > After: > time: real 1.240 secs (user 1.230+0.000 sys 0.010+0.000) > time: real 0.120 secs (user 0.110+0.000 sys 0.000+0.000) > > On mozilla-central with 210557:a280a03c9f3c : > hg --time log -r 'branch(mobile)' -T. > Before: > time: real 10.450 secs (user 10.390+0.000 sys 0.060+0.000) > After: > time: real 7.640 secs (user 7.480+0.000 sys 0.140+0.000) > time: real 0.520 secs (user 0.490+0.000 sys 0.030+0.000) > > First run is 35%/27% faster (primarily because the new code path uses > changelog.branchinfo instead changectx.branch and we avoid messing with > localrepo). Following runs will use the cache and are 16x/20x faster. If you are benchmarking revset please use the perf extension to get your numbers. > > diff --git a/mercurial/revset.py b/mercurial/revset.py > --- a/mercurial/revset.py > +++ b/mercurial/revset.py > @@ -478,6 +478,7 @@ def branch(repo, subset, x): > a regular expression. To match a branch that actually starts with `re:`, > use the prefix `literal:`. > """ > + branch = repo.revbranchcache.branch > try: > b = getstring(x, '') > except error.ParseError: > @@ -489,16 +490,16 @@ def branch(repo, subset, x): > # note: falls through to the revspec case if no branch with > # this name exists > if pattern in repo.branchmap(): > - return subset.filter(lambda r: matcher(repo[r].branch())) > + return subset.filter(lambda r: matcher(branch(r))) > else: > - return subset.filter(lambda r: matcher(repo[r].branch())) > + return subset.filter(lambda r: matcher(branch(r))) > > s = getset(repo, spanset(repo), x) > b = set() > for r in s: > - b.add(repo[r].branch()) > + b.add(branch(r)) > c = s.__contains__ > - return subset.filter(lambda r: c(r) or repo[r].branch() in b) > + return subset.filter(lambda r: c(r) or branch(r) in b) > > def bumped(repo, subset, x): > """``bumped()`` > @@ -1431,7 +1432,7 @@ def matching(repo, subset, x): > getfieldfuncs = [] > _funcs = { > 'user': lambda r: repo[r].user(), > - 'branch': lambda r: repo[r].branch(), > + 'branch': repo.revbranchcache.branch, > 'date': lambda r: repo[r].date(), > 'description': lambda r: repo[r].description(), > 'files': lambda r: repo[r].files(), > @@ -1532,9 +1533,9 @@ def sort(repo, subset, x): > elif k == '-rev': > e.append(-r) > elif k == 'branch': > - e.append(c.branch()) > + e.append(repo.revbranchcache.branch(r)) > elif k == '-branch': > - e.append(invert(c.branch())) > + e.append(invert(repo.revbranchcache.branch(r))) > elif k == 'desc': > e.append(c.description()) > elif k == '-desc': > diff --git a/tests/test-revset.t b/tests/test-revset.t > --- a/tests/test-revset.t > +++ b/tests/test-revset.t > @@ -744,6 +744,64 @@ issue2437 > $ log 'ancestors(8) and (heads(branch("-a-b-c-")) or heads(branch(é)))' > 4 > > +branchname caching Test for branch name cachine should not be in the revset test. They should be in some branch related file. test-revset.t should be revset behavior themself. not internal implementation details. > + cache creation > + $ rm .hg/cache/branchnames > + $ log 'branch("-a-b-c-")' > + 4 > + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames > + c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames > + handling of invalid cache file > + $ echo > .hg/cache/branchnames > + $ log 'branch("-a-b-c-")' > + 4 > + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames > + c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames > + handling of extra trailing data > + $ echo >> .hg/cache/branchnames > + $ log 'branch("-a-b-c-")' > + 4 > + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames > + c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames > + update after commit > + $ hg tag tag > + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames > + c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames > + $ log 'branch("-a-b-c-")' > + 4 > + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames > + 77d75afbf592f045b609c3a68986a908 .hg/cache/branchnames > + $ update after rollback > + $ hg up -qr '.^' > + $ hg rollback -qf > + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames > + 77d75afbf592f045b609c3a68986a908 .hg/cache/branchnames > + $ log 'branch("-a-b-c-")' > + 4 > + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames > + c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames > + handle non-tip node change (could be a problem with some cache invalidation schemes) > + $ hg bundle -q --all bu.hg > + $ hg --config extensions.strip= strip --no-b -qr 8 > + $ hg up -q 7 > + $ hg branch -q hacked > + $ hg ci --amend -qm 'hacked' > + $ hg pull -q bu.hg -r 24286f4ae135 > + $ log 'branch("hacked")' > + 7 > + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames > + 7e7547be03cd3badb5053899aea5c532 .hg/cache/branchnames > + $ hg --config extensions.strip= strip --no-b -qr 7: > + $ hg pull -q bu.hg > + $ rm bu.hg > + $ hg up -qr tip > + $ log 'branch("-a-b-c-")' > + 4 > + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames > + 753e486275e544c562da095fd116a441 .hg/cache/branchnames > +(the cache file still contains the now unused 'hacked' branch name) > + > issue2654: report a parse error if the revset was not completely parsed > > $ log '1 OR 2' > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel >
On 10/16/2014 09:49 AM, Pierre-Yves David wrote: > On 10/15/2014 06:48 PM, Mads Kiilerich wrote: >> # HG changeset patch >> # User Mads Kiilerich <madski@unity3d.com> >> # Date 1413424007 -7200 >> # Thu Oct 16 03:46:47 2014 +0200 >> # Node ID b0e40604be81ff0716e1254f7ce7884599457f71 >> # Parent d87009e720063a8d6d80afdbe6bb9323e2849030 >> revset: use localrepo persistent branch name caching >> >> Branch name filtering in revsets was expensive. For every rev it >> created a >> changectx and called .branch(). (Using changelog.branchinfo() would >> make it a >> bit faster.) >> >> Instead, use the new localrepo caching branch lookup method. > > Could you get ctx.branch() to use this cache too ? Would that benefit > anything? I doubt there will be much to win if we already have a ctx. The cache will also grow with the repo and is not free to load. We do not want to load it just to make a single branch name lookup. But it will probably always be worth it for revsets. > The branchmap code could use it too. and would prewarm it for you. Sure. It would also require storing the closed flag in the cache, in a way that doesn't cause regressions for regular branch name lookups. That is something that can come later. > >> >> On the small hg repo: >> hg log --time -r 'branch(stable) & branch(default)' >> Before: >> time: real 1.910 secs (user 1.890+0.000 sys 0.020+0.000) >> After: >> time: real 1.240 secs (user 1.230+0.000 sys 0.010+0.000) >> time: real 0.120 secs (user 0.110+0.000 sys 0.000+0.000) >> >> On mozilla-central with 210557:a280a03c9f3c : >> hg --time log -r 'branch(mobile)' -T. >> Before: >> time: real 10.450 secs (user 10.390+0.000 sys 0.060+0.000) >> After: >> time: real 7.640 secs (user 7.480+0.000 sys 0.140+0.000) >> time: real 0.520 secs (user 0.490+0.000 sys 0.030+0.000) >> >> First run is 35%/27% faster (primarily because the new code path uses >> changelog.branchinfo instead changectx.branch and we avoid messing with >> localrepo). Following runs will use the cache and are 16x/20x faster. > > If you are benchmarking revset please use the perf extension to get > your numbers. The perf extension is fine for fine tuning. The perf extension will be tricked by the different computations done when populating and using the cache. It will not give any relevant, reliable and comparable values. /Mads
Patch
diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -478,6 +478,7 @@ def branch(repo, subset, x): a regular expression. To match a branch that actually starts with `re:`, use the prefix `literal:`. """ + branch = repo.revbranchcache.branch try: b = getstring(x, '') except error.ParseError: @@ -489,16 +490,16 @@ def branch(repo, subset, x): # note: falls through to the revspec case if no branch with # this name exists if pattern in repo.branchmap(): - return subset.filter(lambda r: matcher(repo[r].branch())) + return subset.filter(lambda r: matcher(branch(r))) else: - return subset.filter(lambda r: matcher(repo[r].branch())) + return subset.filter(lambda r: matcher(branch(r))) s = getset(repo, spanset(repo), x) b = set() for r in s: - b.add(repo[r].branch()) + b.add(branch(r)) c = s.__contains__ - return subset.filter(lambda r: c(r) or repo[r].branch() in b) + return subset.filter(lambda r: c(r) or branch(r) in b) def bumped(repo, subset, x): """``bumped()`` @@ -1431,7 +1432,7 @@ def matching(repo, subset, x): getfieldfuncs = [] _funcs = { 'user': lambda r: repo[r].user(), - 'branch': lambda r: repo[r].branch(), + 'branch': repo.revbranchcache.branch, 'date': lambda r: repo[r].date(), 'description': lambda r: repo[r].description(), 'files': lambda r: repo[r].files(), @@ -1532,9 +1533,9 @@ def sort(repo, subset, x): elif k == '-rev': e.append(-r) elif k == 'branch': - e.append(c.branch()) + e.append(repo.revbranchcache.branch(r)) elif k == '-branch': - e.append(invert(c.branch())) + e.append(invert(repo.revbranchcache.branch(r))) elif k == 'desc': e.append(c.description()) elif k == '-desc': diff --git a/tests/test-revset.t b/tests/test-revset.t --- a/tests/test-revset.t +++ b/tests/test-revset.t @@ -744,6 +744,64 @@ issue2437 $ log 'ancestors(8) and (heads(branch("-a-b-c-")) or heads(branch(é)))' 4 +branchname caching + + cache creation + $ rm .hg/cache/branchnames + $ log 'branch("-a-b-c-")' + 4 + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames + handling of invalid cache file + $ echo > .hg/cache/branchnames + $ log 'branch("-a-b-c-")' + 4 + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames + handling of extra trailing data + $ echo >> .hg/cache/branchnames + $ log 'branch("-a-b-c-")' + 4 + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames + update after commit + $ hg tag tag + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames + $ log 'branch("-a-b-c-")' + 4 + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + 77d75afbf592f045b609c3a68986a908 .hg/cache/branchnames + $ update after rollback + $ hg up -qr '.^' + $ hg rollback -qf + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + 77d75afbf592f045b609c3a68986a908 .hg/cache/branchnames + $ log 'branch("-a-b-c-")' + 4 + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + c31b7e7bdc4e1f69133685cf82550eac .hg/cache/branchnames + handle non-tip node change (could be a problem with some cache invalidation schemes) + $ hg bundle -q --all bu.hg + $ hg --config extensions.strip= strip --no-b -qr 8 + $ hg up -q 7 + $ hg branch -q hacked + $ hg ci --amend -qm 'hacked' + $ hg pull -q bu.hg -r 24286f4ae135 + $ log 'branch("hacked")' + 7 + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + 7e7547be03cd3badb5053899aea5c532 .hg/cache/branchnames + $ hg --config extensions.strip= strip --no-b -qr 7: + $ hg pull -q bu.hg + $ rm bu.hg + $ hg up -qr tip + $ log 'branch("-a-b-c-")' + 4 + $ "$TESTDIR/md5sum.py" .hg/cache/branchnames + 753e486275e544c562da095fd116a441 .hg/cache/branchnames +(the cache file still contains the now unused 'hacked' branch name) + issue2654: report a parse error if the revset was not completely parsed $ log '1 OR 2'