Submitter | Mads Kiilerich |
---|---|
Date | Dec. 14, 2014, 6:34 p.m. |
Message ID | <a51d095337085bc74398.1418582065@ssl.google-analytics.com> |
Download | mbox | patch |
Permalink | /patch/7092/ |
State | Superseded |
Commit | 678f53865c6860a950392691814766957ee89316 |
Headers | show |
Comments
On 12/14/2014 10:34 AM, Mads Kiilerich wrote: > # HG changeset patch > # User Mads Kiilerich <madski@unity3d.com> > # Date 1418581984 -3600 > # Sun Dec 14 19:33:04 2014 +0100 > # Node ID a51d095337085bc74398b39a3b0d2c6e43e3cb80 > # Parent 186394f5dbf37adf268fa034fc897201fd2dbe0d > revset: use localrepo revbranchcache for branch name filtering > > Branch name filtering in revsets was expensive. For every rev it created a > changectx and called .branch(). This patch is doing two differents things: - updating the 'branch(xxx)' revset - updating the 'sort('branch', xxx) revset. I would be more confortable if it was two distincts patches. > > Instead, use the new localrepo revbranchcache. > > perfrevset 'branch(mobile)' on mozilla-central, before: > ! wall 10.904967 comb 10.900000 user 10.860000 sys 0.040000 (best of 3) > After: > ! wall 1.065556 comb 1.060000 user 1.060000 sys 0.000000 (best of 10) > > diff --git a/mercurial/branchmap.py b/mercurial/branchmap.py > --- a/mercurial/branchmap.py > +++ b/mercurial/branchmap.py > @@ -381,6 +381,10 @@ > self._dirty = True > return butf8, close > > + def branchlocal(self, rev): > + """Return branch name of rev in local encoding.""" > + return encoding.tolocal(self.branchinfoutf8(rev)[0]) > + > def save(self): > """Save branch cache if it is dirty.""" > if self._dirty: > diff --git a/mercurial/revset.py b/mercurial/revset.py > --- a/mercurial/revset.py > +++ b/mercurial/revset.py > @@ -452,6 +452,7 @@ > a regular expression. To match a branch that actually starts with `re:`, > use the prefix `literal:`. > """ > + branch = repo.revbranchcache.branchlocal What converting the revset args to utf8 and patching that agains the cache content. It would save a lot on translation during the loop. > try: > b = getstring(x, '') > except error.ParseError: > @@ -463,16 +464,16 @@ > # note: falls through to the revspec case if no branch with > # this name exists > if pattern in repo.branchmap(): > - return subset.filter(lambda r: matcher(repo[r].branch())) > + return subset.filter(lambda r: matcher(branch(r))) > else: > - return subset.filter(lambda r: matcher(repo[r].branch())) > + return subset.filter(lambda r: matcher(branch(r))) > > s = getset(repo, spanset(repo), x) > b = set() > for r in s: > - b.add(repo[r].branch()) > + b.add(branch(r)) > c = s.__contains__ > - return subset.filter(lambda r: c(r) or repo[r].branch() in b) > + return subset.filter(lambda r: c(r) or branch(r) in b) > > def bumped(repo, subset, x): > """``bumped()`` > @@ -1431,7 +1432,7 @@ > getfieldfuncs = [] > _funcs = { > 'user': lambda r: repo[r].user(), > - 'branch': lambda r: repo[r].branch(), > + 'branch': repo.revbranchcache.branchlocal, > 'date': lambda r: repo[r].date(), > 'description': lambda r: repo[r].description(), > 'files': lambda r: repo[r].files(), > @@ -1534,9 +1535,9 @@ > elif k == '-rev': > e.append(-r) > elif k == 'branch': > - e.append(c.branch()) > + e.append(repo.revbranchcache.branchlocal(r)) > elif k == '-branch': > - e.append(invert(c.branch())) > + e.append(invert(repo.revbranchcache.branchlocal(r))) > elif k == 'desc': > e.append(c.description()) > elif k == '-desc': > diff --git a/tests/test-branches.t b/tests/test-branches.t > --- a/tests/test-branches.t > +++ b/tests/test-branches.t > @@ -520,4 +520,75 @@ > } > ] > > +revision branch name caching implementation > + > +cache creation > + $ rm .hg/cache/revbranchnames > + $ hg debugrevspec 'branch("re:a ")' > + 7 > + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames > + b90894765458b0298f0c822fb5193c1e .hg/cache/revbranchnames > +recovery from invalid cache file > + $ echo > .hg/cache/revbranchnames > + $ hg debugrevspec 'branch("re:a ")' > + 7 > + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames > + b90894765458b0298f0c822fb5193c1e .hg/cache/revbranchnames > +recovery from other corruption - extra trailing data > + $ echo >> .hg/cache/revbranchnames > + $ hg debugrevspec 'branch("re:a ")' > + 7 > + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames > + b90894765458b0298f0c822fb5193c1e .hg/cache/revbranchnames > +lazy update after commit > + $ hg tag tag > + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames > + 807c628e9f3f9040cc1da7124c4cb2d8 .hg/cache/revbranchnames > + $ hg debugrevspec 'branch("re:a ")' > + 7 > + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames > + 807c628e9f3f9040cc1da7124c4cb2d8 .hg/cache/revbranchnames > +update after rollback - cache keeps stripped revs until written for other reasons > + $ hg up -qr '.^' > + $ hg rollback -qf > + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames > + 807c628e9f3f9040cc1da7124c4cb2d8 .hg/cache/revbranchnames > + $ hg debugrevspec 'branch("re:a ")' > + 7 > + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames > + 807c628e9f3f9040cc1da7124c4cb2d8 .hg/cache/revbranchnames > +handle history mutations that doesn't change the tip node - this is a problem > +with the cache invalidation scheme used by branchmap > + $ hg log -r tip+b -T'{rev}:{node|short} {branch}\n' > + 14:f894c25619d3 c > + 13:e23b5505d1ad b > + $ hg bundle -q --all bu.hg > + $ hg --config extensions.strip= strip --no-b -qr -1: > + $ hg up -q tip > + $ hg branch > + b > + $ hg branch -q hacked > + $ hg ci --amend -qm 'hacked' > + $ hg pull -q bu.hg -r f894c25619d3 > + $ hg log -r tip+b -T'{rev}:{node|short} {branch}\n' > + 14:f894c25619d3 c > + 12:e3d49c0575d8 b > + $ hg debugrevspec 'branch("hacked")' > + 13 > + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames > + b4401727ffd84a7c339faf8a6e470671 .hg/cache/revbranchnames > +cleanup, restore old state > + $ hg --config extensions.strip= strip --no-b -qr -2: > + $ hg pull -q bu.hg > + $ rm bu.hg > + $ hg up -qr tip > + $ hg log -r tip -T'{rev}:{node|short}\n' > + 14:f894c25619d3 > +the cache file do not go back to the old state - it still contains the > +now unused 'hacked' branch name) > + $ hg debugrevspec 'branch("re:a ")' > + 7 > + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames > + e1a76da810555ed88e3b2f467395e2ff .hg/cache/revbranchnames > + > $ cd .. > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel >
Patch
diff --git a/mercurial/branchmap.py b/mercurial/branchmap.py --- a/mercurial/branchmap.py +++ b/mercurial/branchmap.py @@ -381,6 +381,10 @@ self._dirty = True return butf8, close + def branchlocal(self, rev): + """Return branch name of rev in local encoding.""" + return encoding.tolocal(self.branchinfoutf8(rev)[0]) + def save(self): """Save branch cache if it is dirty.""" if self._dirty: diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -452,6 +452,7 @@ a regular expression. To match a branch that actually starts with `re:`, use the prefix `literal:`. """ + branch = repo.revbranchcache.branchlocal try: b = getstring(x, '') except error.ParseError: @@ -463,16 +464,16 @@ # note: falls through to the revspec case if no branch with # this name exists if pattern in repo.branchmap(): - return subset.filter(lambda r: matcher(repo[r].branch())) + return subset.filter(lambda r: matcher(branch(r))) else: - return subset.filter(lambda r: matcher(repo[r].branch())) + return subset.filter(lambda r: matcher(branch(r))) s = getset(repo, spanset(repo), x) b = set() for r in s: - b.add(repo[r].branch()) + b.add(branch(r)) c = s.__contains__ - return subset.filter(lambda r: c(r) or repo[r].branch() in b) + return subset.filter(lambda r: c(r) or branch(r) in b) def bumped(repo, subset, x): """``bumped()`` @@ -1431,7 +1432,7 @@ getfieldfuncs = [] _funcs = { 'user': lambda r: repo[r].user(), - 'branch': lambda r: repo[r].branch(), + 'branch': repo.revbranchcache.branchlocal, 'date': lambda r: repo[r].date(), 'description': lambda r: repo[r].description(), 'files': lambda r: repo[r].files(), @@ -1534,9 +1535,9 @@ elif k == '-rev': e.append(-r) elif k == 'branch': - e.append(c.branch()) + e.append(repo.revbranchcache.branchlocal(r)) elif k == '-branch': - e.append(invert(c.branch())) + e.append(invert(repo.revbranchcache.branchlocal(r))) elif k == 'desc': e.append(c.description()) elif k == '-desc': diff --git a/tests/test-branches.t b/tests/test-branches.t --- a/tests/test-branches.t +++ b/tests/test-branches.t @@ -520,4 +520,75 @@ } ] +revision branch name caching implementation + +cache creation + $ rm .hg/cache/revbranchnames + $ hg debugrevspec 'branch("re:a ")' + 7 + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames + b90894765458b0298f0c822fb5193c1e .hg/cache/revbranchnames +recovery from invalid cache file + $ echo > .hg/cache/revbranchnames + $ hg debugrevspec 'branch("re:a ")' + 7 + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames + b90894765458b0298f0c822fb5193c1e .hg/cache/revbranchnames +recovery from other corruption - extra trailing data + $ echo >> .hg/cache/revbranchnames + $ hg debugrevspec 'branch("re:a ")' + 7 + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames + b90894765458b0298f0c822fb5193c1e .hg/cache/revbranchnames +lazy update after commit + $ hg tag tag + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames + 807c628e9f3f9040cc1da7124c4cb2d8 .hg/cache/revbranchnames + $ hg debugrevspec 'branch("re:a ")' + 7 + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames + 807c628e9f3f9040cc1da7124c4cb2d8 .hg/cache/revbranchnames +update after rollback - cache keeps stripped revs until written for other reasons + $ hg up -qr '.^' + $ hg rollback -qf + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames + 807c628e9f3f9040cc1da7124c4cb2d8 .hg/cache/revbranchnames + $ hg debugrevspec 'branch("re:a ")' + 7 + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames + 807c628e9f3f9040cc1da7124c4cb2d8 .hg/cache/revbranchnames +handle history mutations that doesn't change the tip node - this is a problem +with the cache invalidation scheme used by branchmap + $ hg log -r tip+b -T'{rev}:{node|short} {branch}\n' + 14:f894c25619d3 c + 13:e23b5505d1ad b + $ hg bundle -q --all bu.hg + $ hg --config extensions.strip= strip --no-b -qr -1: + $ hg up -q tip + $ hg branch + b + $ hg branch -q hacked + $ hg ci --amend -qm 'hacked' + $ hg pull -q bu.hg -r f894c25619d3 + $ hg log -r tip+b -T'{rev}:{node|short} {branch}\n' + 14:f894c25619d3 c + 12:e3d49c0575d8 b + $ hg debugrevspec 'branch("hacked")' + 13 + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames + b4401727ffd84a7c339faf8a6e470671 .hg/cache/revbranchnames +cleanup, restore old state + $ hg --config extensions.strip= strip --no-b -qr -2: + $ hg pull -q bu.hg + $ rm bu.hg + $ hg up -qr tip + $ hg log -r tip -T'{rev}:{node|short}\n' + 14:f894c25619d3 +the cache file do not go back to the old state - it still contains the +now unused 'hacked' branch name) + $ hg debugrevspec 'branch("re:a ")' + 7 + $ "$TESTDIR/md5sum.py" .hg/cache/revbranchnames + e1a76da810555ed88e3b2f467395e2ff .hg/cache/revbranchnames + $ cd ..