From patchwork Tue Feb 24 09:11:54 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [3, of, 3] tags: preserve filtered .hgtags filenodes in tags cache (issue4550) From: Gregory Szorc X-Patchwork-Id: 7825 Message-Id: <7f1904705c29ebe7de38.1424769114@gps-mbp.local> To: mercurial-devel@selenic.com Date: Tue, 24 Feb 2015 01:11:54 -0800 # HG changeset patch # User Gregory Szorc # Date 1424769075 28800 # Tue Feb 24 01:11:15 2015 -0800 # Branch stable # Node ID 7f1904705c29ebe7de3874f2f03c42e261ed1c96 # Parent 7d72752b8da5bb2482e6eac47545a78ed3fff592 tags: preserve filtered .hgtags filenodes in tags cache (issue4550) If the tags cache is populated on an unfiltered repository and later populated on a filtered repository, .hgtags filenode entries for filtered revisions will disappear from the tags cache because the tags cache code currently filters out filenode entries for revisions not known to the current repo object. This behavior results in potentially expensive recalculation of .hgtags filenode values for filtered revisions. For evolution users, who create many hidden changesets and heads, this could result in gradual slowdown, as each hidden head will add overhead to resolving tags on an unfiltered repo. This patch makes the tags cache filtered revision aware. Filenode entries for filtered revisions are preserved during reading and writing. Entries are only dropped from the tags cache if they don't correspond to a head, filtered or otherwise. diff --git a/mercurial/tags.py b/mercurial/tags.py --- a/mercurial/tags.py +++ b/mercurial/tags.py @@ -246,12 +246,15 @@ def _readtagcache(ui, repo): return (None, None, tags, False) if cachefile: cachefile.close() # ignore rest of file - repoheads = repo.heads() + ourheads = repo.heads() + repo = repo.unfiltered() + allheads = repo.heads() + # Case 2 (uncommon): empty repo; get out quickly and don't bother # writing an empty cache. - if repoheads == [nullid]: + if allheads == [nullid]: return ([], {}, {}, False) # Case 3 (uncommon): cache file missing or empty. @@ -268,14 +271,14 @@ def _readtagcache(ui, repo): # exposed". if not len(repo.file('.hgtags')): # No tags have ever been committed, so we can avoid a # potentially expensive search. - return (repoheads, cachefnode, None, True) + return (ourheads, cachefnode, None, True) starttime = time.time() newheads = [head - for head in repoheads + for head in allheads if head not in set(cacheheads)] # Now we have to lookup the .hgtags filenode for every new head. # This is the most expensive part of finding tags, so performance @@ -297,9 +300,9 @@ def _readtagcache(ui, repo): len(cachefnode), len(newheads), duration) # Caller has to iterate over all heads, but can use the filenodes in # cachefnode to get to each .hgtags revision quickly. - return (repoheads, cachefnode, None, True) + return (ourheads, cachefnode, None, True) def _writetagcache(ui, repo, heads, tagfnode, cachetags): try: @@ -309,29 +312,39 @@ def _writetagcache(ui, repo, heads, tagf ui.log('tagscache', 'writing tags cache file with %d heads and %d tags\n', len(heads), len(cachetags)) - realheads = repo.heads() # for sanity checks below + # We want to carry forward tagfnode entries that belong to filtered revs, + # even if they aren't in the explicit list of heads. Since entries in the + # cache must be in descending revlog order, we need to merge the sets + # before writing. + # + # When choosing what filenode entries to write, we must consider both the + # filtered and unfiltered views. Otherwise, valid entries may be dropped. + revs = {} + ourheads = set(repo.heads()) + repo = repo.unfiltered() + unfilteredheads = set(repo.heads()) + allheads = ourheads | unfilteredheads + for head, fnode in tagfnode.items(): + if head not in allheads: + continue + + rev = repo.changelog.rev(head) + revs[rev] = '%s %s' % (hex(head), hex(fnode)) + for head in heads: - # temporary sanity checks; these can probably be removed - # once this code has been in crew for a few weeks - assert head in repo.changelog.nodemap, \ - 'trying to write non-existent node %s to tag cache' % short(head) - assert head in realheads, \ + assert head in allheads, \ 'trying to write non-head %s to tag cache' % short(head) assert head != nullid, \ 'trying to write nullid to tag cache' - # This can't fail because of the first assert above. When/if we - # remove that assert, we might want to catch LookupError here - # and downgrade it to a warning. - rev = repo.changelog.rev(head) + if head not in tagfnode: + rev = repo.changelog.rev(head) + revs[rev] = hex(head) - fnode = tagfnode.get(head) - if fnode: - cachefile.write('%d %s %s\n' % (rev, hex(head), hex(fnode))) - else: - cachefile.write('%d %s\n' % (rev, hex(head))) + for rev, line in sorted(revs.items(), reverse=True): + cachefile.write('%d %s\n' % (rev, line)) # Tag names in the cache are in UTF-8 -- which is the whole reason # we keep them in UTF-8 throughout this module. If we converted # them local encoding on input, we would lose info writing them to diff --git a/tests/test-obsolete-tag-cache.t b/tests/test-obsolete-tag-cache.t --- a/tests/test-obsolete-tag-cache.t +++ b/tests/test-obsolete-tag-cache.t @@ -62,8 +62,9 @@ repopulation .hgtags filenodes for hidden heads should be visible (issue4550) $ cat .hg/cache/tags 7 eb610439e10e0c6b296f97b59624c2e24fc59e30 b3bce87817fe7ac9dca2834366c1d7534c095cf1 + 3 c3cb30f2d2cd0aae008cc91a07876e3c5131fd22 b3bce87817fe7ac9dca2834366c1d7534c095cf1 55482a6fb4b1881fa8f746fd52cf6f096bb21c89 test1 d75775ffbc6bca1794d300f5571272879bd280da test2