@@ -174,8 +174,40 @@ def _updatetags(filetags, tagtype, allta
ahist.extend([n for n in bhist if n not in ahist])
alltags[name] = anode, ahist
+# There are currently 2 files related to the tags cache:
+# cache/tags and cache/hgtagsfnodes1. The former was the original
+# cache and consists of two caches: a mapping of changeset to
+# .hgtags filenodes and a caching of the result of tags resolution.
+# The hgtags fnodes cache is simply a mapping of changeset nodes to
+# .hgtags filenodes.
+#
+# The "tags" cache has known problems for users that have obsolescence
+# enabled.
+#
+# The "tags" cache is concerned with the heads for the repository. For
+# users of obsolescence, there are different sets of heads for the
+# repository: the non-hidden heads and the hidden heads. If tags
+# are computed against the normal/filtered repository, data about
+# hidden heads won't be stored in the cache. If tags against the
+# unfiltered/hidden heads are requested, the tags cache will need to
+# lookup data for these hidden heads. This could lead to extra
+# and potentially expensive computation, leading to significant
+# performance issues.
+#
+# Currently, we work around the issue with the "tags" cache throwing
+# away data on heads by supplementing that cache with a standalone
+# cache of changeset node to .hgtags filenode. Looking up .hgtags
+# filenodes can be expensive on large repositories, as it requires
+# manifests to be resolved. With a dedicated filenode cache file that
+# is immune to invalidation, we perform this lookup at most once
+# and avoid the costliest aspect of "tags" cache resolution and
+# invalidation.
+#
+# Ideally, there should be separate "tags" caches for each view
+# of a repository, like how branchmap caches work.
+#
# The tag cache only stores info about heads, not the tag contents
# from each head. I.e. it doesn't try to squeeze out the maximum
# performance, but is simpler has a better chance of actually
# working correctly. And this gives the biggest performance win: it
@@ -276,26 +308,36 @@ def _readtagcache(ui, repo):
newheads = [head
for head in repoheads
if head not in set(cacheheads)]
+ existingfnodes = _readhgtagsfnodescache(ui, repo)
+ manifestlookupcount = 0
+
# Now we have to lookup the .hgtags filenode for every new head.
# This is the most expensive part of finding tags, so performance
# depends primarily on the size of newheads. Worst case: no cache
# file, so newheads == repoheads.
for head in reversed(newheads):
+ # Look in the supplemental hgtags fnodes cache first.
+ fnode = existingfnodes.get(head)
+ if fnode:
+ cachefnode[head] = fnode
+ continue
+
cctx = repo[head]
try:
fnode = cctx.filenode('.hgtags')
cachefnode[head] = fnode
+ manifestlookupcount += 1
except error.LookupError:
# no .hgtags file on this head
pass
duration = time.time() - starttime
ui.log('tagscache',
'resolved %d tags cache entries from %d manifests in %0.4f '
'seconds\n',
- len(cachefnode), len(newheads), duration)
+ len(cachefnode), manifestlookupcount, duration)
# Caller has to iterate over all heads, but can use the filenodes in
# cachefnode to get to each .hgtags revision quickly.
return (repoheads, cachefnode, None, True)
@@ -345,4 +387,69 @@ def _writetagcache(ui, repo, heads, tagf
try:
cachefile.close()
except (OSError, IOError):
pass
+
+ _updatehgtagsfnodescache(ui, repo, tagfnode)
+
+_fnodescachefile = 'cache/hgtagsfnodes1'
+
+def _readhgtagsfnodescache(ui, repo):
+ """Read the cache mapping changeset nodes to .hgtags filenodes.
+
+ The cache consists of pairs of 20-byte nodes.
+
+ No validation of the entries is performed other than a spot test
+ that the file doesn't contain extra data. Since nodes are derived
+ from content and are deterministic, mappings are constant. The
+ only thing that can change is that a changeset may be stripped.
+ Callers must perform their own checking to ensure "unknown"
+ entries don't leak out.
+ """
+ data = repo.vfs.tryread(_fnodescachefile)
+ nodes = {}
+ l = len(data)
+ offset = 0
+ while offset + 40 <= l:
+ node = data[offset:offset + 20]
+ fnode = data[offset + 20:offset + 40]
+ nodes[node] = fnode
+ offset += 40
+
+ ui.log('tagscache',
+ 'read %d entries from hgtags filenodes cache\n',
+ len(nodes))
+
+ # If we have data left over, something wasn't written properly.
+ # We remove the invalid cache and throw away any data that was
+ # read since we can't trust it.
+ if offset != l:
+ ui.warn(_('.hg/cache/%s is corrupt; it will be rebuilt\n') %
+ _fnodescachefile)
+ repo.vfs.unlink(_fnodescachefile)
+ nodes = {}
+
+ return nodes
+
+def _updatehgtagsfnodescache(ui, repo, fnodes):
+ """Update the cache file mapping changeset nodes to .hgtags fnodes.
+
+ For now, all entries are preserved for all of time. In the future, we
+ should consider pruning this cache so its growth isn't unbounded.
+ """
+ existing = _readhgtagsfnodescache(ui, repo)
+
+ # Append new entries instead of re-writing existing content to reduce
+ # I/O.
+ missing = set(fnodes.keys()) - set(existing.keys())
+
+ entries = []
+ # sorted() is to make tests sane.
+ for node in sorted(missing):
+ fnode = fnodes[node]
+ entries.append('%s%s' % (node, fnode))
+
+ data = ''.join(entries)
+ repo.vfs.append(_fnodescachefile, data)
+ ui.log('tagscache',
+ 'appended %d entries to hgtags filenodes cache\n',
+ len(missing))
@@ -130,11 +130,13 @@ tags cache gets logged
$ hg tag -m 'create test tag' test-tag
$ hg tags
tip 3:5b5562c08298
test-tag 2:d02f48003e62
- $ hg blackbox -l 3
+ $ hg blackbox -l 5
1970/01/01 00:00:00 bob> resolved 1 tags cache entries from 1 manifests in ?.???? seconds (glob)
1970/01/01 00:00:00 bob> writing tags cache file with 2 heads and 1 tags
+ 1970/01/01 00:00:00 bob> read 0 entries from hgtags filenodes cache
+ 1970/01/01 00:00:00 bob> appended 1 entries to hgtags filenodes cache
1970/01/01 00:00:00 bob> tags exited 0 after ?.?? seconds (glob)
extension and python hooks - use the eol extension for a pythonhook
@@ -2,8 +2,11 @@ Helper functions:
$ cacheexists() {
> [ -f .hg/cache/tags ] && echo "tag cache exists" || echo "no tag cache"
> }
+ $ fnodescacheexists() {
+ > [ -f .hg/cache/hgtagsfnodes1 ] && echo "fnodes cache exists" || echo "no fnodes cache"
+ > }
$ dumptags() {
> rev=$1
> echo "rev $rev: .hgtags:"
@@ -19,12 +22,16 @@ Setup:
$ hg init t
$ cd t
$ cacheexists
no tag cache
+ $ fnodescacheexists
+ no fnodes cache
$ hg id
000000000000 tip
$ cacheexists
no tag cache
+ $ fnodescacheexists
+ no fnodes cache
$ echo a > a
$ hg add a
$ hg commit -m "test"
$ hg co
@@ -32,14 +39,18 @@ Setup:
$ hg identify
acb14030fe0a tip
$ cacheexists
tag cache exists
+ $ fnodescacheexists
+ fnodes cache exists
Try corrupting the cache
$ printf 'a b' > .hg/cache/tags
+ $ printf 'extra' > .hg/cache/hgtagsfnodes1
$ hg identify
.hg/cache/tags is corrupt, rebuilding it
+ .hg/cache/cache/hgtagsfnodes1 is corrupt; it will be rebuilt
acb14030fe0a tip
$ cacheexists
tag cache exists
$ hg identify
@@ -68,16 +79,16 @@ Create a tag behind hg's back:
b9154636be93 tip
Repeat with cold tag cache:
- $ rm -f .hg/cache/tags
+ $ rm -f .hg/cache/tags .hg/cache/hgtagsfnodes1
$ hg identify
b9154636be93 tip
And again, but now unable to write tag cache:
#if unix-permissions
- $ rm -f .hg/cache/tags
+ $ rm -f .hg/cache/tags .hg/cache/hgtagsfnodes1
$ chmod 555 .hg
$ hg identify
b9154636be93 tip
$ chmod 755 .hg
@@ -327,9 +338,9 @@ Strip 2: destroy whole branch, no old he
saved backup bundle to $TESTTMP/t3/.hg/strip-backup/*-backup.hg (glob)
$ hg tags # partly stale
tip 4:735c3ca72986
bar 0:bbd179dfa0a7
- $ rm -f .hg/cache/tags
+ $ rm -f .hg/cache/tags .hg/cache/hgtagsfnodes1
$ hg tags # cold cache
tip 4:735c3ca72986
bar 0:bbd179dfa0a7