# HG changeset patch # User Greg Ward # Date 1247755182 14400 # Node ID f528d1a93491e7453cb015a72aeafae3dd226526 # Parent 09a1ee4987568e734eef3f2e0f20bc6d6d021f76 tags: implement persistent tag caching (issue548). - rename findglobaltags() to findglobaltags1() (so the "no cache" implementation is still there if we need it) - add findglobaltags2() and make findglobaltags() an alias for it (disabling tag caching is a one-line patch) - factor out tagcache class with methods readcache() and writecache(); the expensive part of tag finding (iterate over heads and find .hgtags filenode) is now in tagcache.readcache() diff -r 09a1ee498756 -r f528d1a93491 mercurial/localrepo.py --- a/mercurial/localrepo.py Thu Jul 16 10:39:41 2009 -0400 +++ b/mercurial/localrepo.py Thu Jul 16 10:39:42 2009 -0400 @@ -915,11 +915,20 @@ '''Inform the repository that nodes have been destroyed. Intended for use by strip and rollback, so there's a common place for anything that has to be done after destroying history.''' - # Do nothing for now: this is a placeholder that will be used - # when we add tag caching. # XXX it might be nice if we could take the list of destroyed # nodes, but I don't see an easy way for rollback() to do that - pass + + # Ensure the persistent tag cache is updated. Doing it now + # means that the tag cache only has to worry about destroyed + # heads immediately after a strip/rollback. That in turn + # guarantees that "cachetip == currenttip" (comparing both rev + # and node) always means no nodes have been added or destroyed. + + # XXX this is suboptimal when qrefresh'ing: we strip the current + # head, refresh the tag cache, then immediately add a new head. + # But I think doing it this way is necessary for the "instant + # tag cache retrieval" case to work. + tags_.findglobaltags(self.ui, self, {}, {}) def walk(self, match, node=None): ''' diff -r 09a1ee498756 -r f528d1a93491 mercurial/tags.py --- a/mercurial/tags.py Thu Jul 16 10:39:41 2009 -0400 +++ b/mercurial/tags.py Thu Jul 16 10:39:42 2009 -0400 @@ -6,16 +6,29 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2, incorporated herein by reference. -# Currently this module only deals with reading tags. Soon it will grow -# support for caching tag info. Eventually, it could take care of -# updating (adding/removing/moving) tags too. +# Currently this module only deals with reading and caching tags. +# Eventually, it could take care of updating (adding/removing/moving) +# tags too. -from node import bin, hex +import os +from node import nullid, bin, hex, short from i18n import _ import encoding import error -def findglobaltags(ui, repo, alltags, tagtypes): +def _debugalways(ui, *msg): + ui.write(*msg) + +def _debugconditional(ui, *msg): + ui.debug(*msg) + +def _debugnever(ui, *msg): + pass + +_debug = _debugalways +_debug = _debugnever + +def findglobaltags1(ui, repo, alltags, tagtypes): '''Find global tags in repo by reading .hgtags from every head that has a distinct version of it. Updates the dicts alltags, tagtypes in place: alltags maps tag name to (node, hist) pair (see _readtags() @@ -44,6 +57,36 @@ ui, repo, fctx.data().splitlines(), fctx) _updatetags(filetags, "global", alltags, tagtypes) +def findglobaltags2(ui, repo, alltags, tagtypes): + '''Same as findglobaltags1(), but with caching.''' + (heads, tagfnode, shouldwrite) = _readtagcache(ui, repo) + + _debug(ui, "reading tags from %d head(s): %s\n" + % (len(heads), map(short, reversed(heads)))) + seen = set() # set of fnode + fctx = None + for head in reversed(heads): # oldest to newest + assert head in repo.changelog.nodemap, \ + "tag cache returned bogus head %s" % short(head) + + fnode = tagfnode.get(head) + if fnode and fnode not in seen: + seen.add(fnode) + if not fctx: + fctx = repo.filectx('.hgtags', fileid=fnode) + else: + fctx = fctx.filectx(fnode) + + filetags = _readtags(ui, repo, fctx.data().splitlines(), fctx) + _updatetags(filetags, 'global', alltags, tagtypes) + + # and update the cache (if necessary) + if shouldwrite: + _writetagcache(ui, repo, heads, tagfnode) + +# Set this to findglobaltags1 to disable tag caching. +findglobaltags = findglobaltags2 + def readlocaltags(ui, repo, alltags, tagtypes): '''Read local tags in repo. Update alltags and tagtypes.''' try: @@ -120,3 +163,148 @@ alltags[name] = anode, ahist tagtypes[name] = tagtype + +# The tag cache only stores info about heads, not the tag contents +# from each head. I.e. it doesn't try to squeeze out the maximum +# performance, but is simpler has a better chance of actually +# working correctly. And this gives the biggest performance win: it +# avoids looking up .hgtags in the manifest for every head, and it +# can avoid calling heads() at all if there have been no changes to +# the repo. + +def _readtagcache(ui, repo): + '''Read the tag cache and return a tuple (heads, fnodes, + shouldwrite). heads is the list of all heads currently in the + repository (ordered from tip to oldest) and fnodes is a mapping from + head to .hgtags filenode. Caller is responsible for reading tag + info from each head.''' + + try: + cachefile = repo.opener('tags.cache', 'r') + _debug(ui, 'reading tag cache from %s\n' % cachefile.name) + except IOError: + cachefile = None + + # The cache file consists of lines like + # [] + # where and redundantly identify a repository + # head from the time the cache was written, and is the + # filenode of .hgtags on that head. Heads with no .hgtags file will + # have no . The cache is ordered from tip to oldest (which + # is part of why is there: a quick visual check is all + # that's required to ensure correct order). + # + # This information is enough to let us avoid the most expensive part + # of finding global tags, which is looking up in the + # manifest for each head. + cacherevs = [] # list of headrev + cacheheads = [] # list of headnode + cachefnode = {} # map headnode to filenode + if cachefile: + for line in cachefile: + line = line.rstrip().split() + cacherevs.append(int(line[0])) + headnode = bin(line[1]) + cacheheads.append(headnode) + if len(line) == 3: + fnode = bin(line[2]) + cachefnode[headnode] = fnode + + cachefile.close() + + tipnode = repo.changelog.tip() + tiprev = len(repo.changelog) - 1 + + # Case 1 (common): tip is the same, so nothing has changed. + # (Unchanged tip trivially means no changesets have been added. + # But, thanks to localrepository.destroyed(), it also means none + # have been destroyed by strip or rollback.) + if cacheheads and cacheheads[0] == tipnode and cacherevs[0] == tiprev: + _debug(ui, "tag cache: tip unchanged\n") + return (cacheheads, cachefnode, False) + + repoheads = repo.heads() + + # Case 2 (uncommon): empty repo; get out quickly and don't bother + # writing an empty cache. + if repoheads == [nullid]: + return ([], {}, False) + + # Case 3 (uncommon): cache file missing or empty. + if not cacheheads: + _debug(ui, 'tag cache: cache file missing or empty\n') + + # Case 4 (uncommon): tip rev decreased. This should only happen + # when we're called from localrepository.destroyed(). Refresh the + # cache so future invocations will not see disappeared heads in the + # cache. + elif cacheheads and tiprev < cacherevs[0]: + _debug(ui, + 'tag cache: tip rev decremented (from %d to %d), ' + 'so we must be destroying nodes\n' + % (cacherevs[0], tiprev)) + + # Case 5 (common): tip has changed, so we've added/replaced heads. + else: + _debug(ui, + 'tag cache: tip has changed (%d:%s); must find new heads\n' + % (tiprev, short(tipnode))) + + # Luckily, the code to handle cases 3, 4, 5 is the same. So the + # above if/elif/else can disappear once we're confident this thing + # actually works and we don't need the debug output. + + # N.B. in case 4 (nodes destroyed), "new head" really means "newly + # exposed". + newheads = [head + for head in repoheads + if head not in set(cacheheads)] + _debug(ui, 'tag cache: found %d head(s) not in cache: %s\n' + % (len(newheads), map(short, newheads))) + + # Now we have to lookup the .hgtags filenode for every new head. + # This is the most expensive part of finding tags, so performance + # depends primarily on the size of newheads. Worst case: no cache + # file, so newheads == repoheads. + for head in newheads: + cctx = repo[head] + try: + fnode = cctx.filenode('.hgtags') + cachefnode[head] = fnode + except error.LookupError: + # no .hgtags file on this head + pass + + # Caller has to iterate over all heads, but can use the filenodes in + # cachefnode to get to each .hgtags revision quickly. + return (repoheads, cachefnode, True) + +def _writetagcache(ui, repo, heads, tagfnode): + + cachefile = repo.opener('tags.cache', 'w', atomictemp=True) + _debug(ui, 'writing cache file %s\n' % cachefile.name) + + realheads = repo.heads() # for sanity checks below + for head in heads: + # temporary sanity checks; these can probably be removed + # once this code has been in crew for a few weeks + assert head in repo.changelog.nodemap, \ + 'trying to write non-existent node %s to tag cache' % short(head) + assert head in realheads, \ + 'trying to write non-head %s to tag cache' % short(head) + assert head != nullid, \ + 'trying to write nullid to tag cache' + + # This can't fail because of the first assert above. When/if we + # remove that assert, we might want to catch LookupError here + # and downgrade it to a warning. + rev = repo.changelog.rev(head) + + fnode = tagfnode.get(head) + if fnode: + cachefile.write('%d %s %s\n' % (rev, hex(head), hex(fnode))) + else: + cachefile.write('%d %s\n' % (rev, hex(head))) + + cachefile.rename() + cachefile.close() diff -r 09a1ee498756 -r f528d1a93491 tests/test-mq --- a/tests/test-mq Thu Jul 16 10:39:41 2009 -0400 +++ b/tests/test-mq Thu Jul 16 10:39:42 2009 -0400 @@ -107,9 +107,18 @@ hg qpop checkundo qpop -echo % qpush +echo % qpush with dump of tag cache +# Dump the tag cache to ensure that it has exactly one head after qpush. +rm -f .hg/tags.cache +hg tags > /dev/null +echo ".hg/tags.cache (pre qpush):" +sed 's/ [0-9a-f]*//' .hg/tags.cache hg qpush +hg tags > /dev/null +echo ".hg/tags.cache (post qpush):" +sed 's/ [0-9a-f]*//' .hg/tags.cache + checkundo qpush cd .. diff -r 09a1ee498756 -r f528d1a93491 tests/test-mq.out --- a/tests/test-mq.out Thu Jul 16 10:39:41 2009 -0400 +++ b/tests/test-mq.out Thu Jul 16 10:39:42 2009 -0400 @@ -110,9 +110,13 @@ % qpop popping test.patch patch queue now empty -% qpush +% qpush with dump of tag cache +.hg/tags.cache (pre qpush): +1 applying test.patch now at: test.patch +.hg/tags.cache (post qpush): +2 % pop/push outside repo popping test.patch patch queue now empty diff -r 09a1ee498756 -r f528d1a93491 tests/test-tags --- a/tests/test-tags Thu Jul 16 10:39:41 2009 -0400 +++ b/tests/test-tags Thu Jul 16 10:39:42 2009 -0400 @@ -1,15 +1,22 @@ #!/bin/sh +cacheexists() { + [ -f .hg/tags.cache ] && echo "tag cache exists" || echo "no tag cache" +} + echo "% setup" mkdir t cd t hg init +cacheexists hg id +cacheexists echo a > a hg add a hg commit -m "test" hg co hg identify +cacheexists echo "% create local tag with long name" T=`hg identify --debug --id` @@ -25,6 +32,10 @@ hg tags hg identify +# repeat with cold tag cache +rm -f .hg/tags.cache +hg identify + echo "% create a branch" echo bb > a hg status diff -r 09a1ee498756 -r f528d1a93491 tests/test-tags.out --- a/tests/test-tags.out Thu Jul 16 10:39:41 2009 -0400 +++ b/tests/test-tags.out Thu Jul 16 10:39:42 2009 -0400 @@ -1,7 +1,10 @@ % setup +no tag cache 000000000000 tip +no tag cache 0 files updated, 0 files merged, 0 files removed, 0 files unresolved acb14030fe0a tip +tag cache exists % create local tag with long name tip 0:acb14030fe0a This is a local tag with a really long name! 0:acb14030fe0a @@ -10,6 +13,7 @@ tip 1:b9154636be93 first 0:acb14030fe0a b9154636be93 tip +b9154636be93 tip % create a branch M a b9154636be93+ tip @@ -73,7 +77,9 @@ rev 4: .hgtags: bbd179dfa0a71671c253b3ae0aa1513b60d199fa bar .hg/tags.cache: -no such file +4 0c192d7d5e6b78a714de54a2e9627952a877e25a 0c04f2a8af31de17fab7422878ee5a2dadbc943d +3 6fa450212aeb2a21ed616a54aea39a4a27894cd7 7d3b718c964ef37b89e550ebdafd5789e76ce1b0 +2 7a94127795a33c10a370c93f731fd9fea0b79af6 0c04f2a8af31de17fab7422878ee5a2dadbc943d % test tag removal changeset: 5:5f6e8655b1c7 tag: tip