tags: change format of tags cache files
authorGregory Szorc <gregory.szorc@gmail.com>
Thu, 16 Apr 2015 12:01:00 -0400
changeset 24760 410f3856196f
parent 24759 d082c6ef9ec3
child 24761 61a6d83280d3
tags: change format of tags cache files .hgtags fnodes are now written to a shared cache file. They don't need to exist in the per-filter tags cache files. Stop writing them. The format of the tags cache file has changed in a backwards incompatible way. This should be acceptable, as we just established per-filter tags cache files and no client should have per-filter tags cache files that will need to be read. So no backwards compatbility concern is present. The new format has a single header line followed by resolved tags entries. The header line is similar to the old first line with a major difference: we now compute and store a hash of the filtered revisions. Before, if the set of filtered revs changed, we may return incorrect results. We now detect that. A test for verifying filtered rev change is handled properly has been added.
mercurial/tags.py
tests/test-blackbox.t
tests/test-mq.t
tests/test-obsolete-tag-cache.t
tests/test-tags.t
--- a/mercurial/tags.py	Thu Apr 16 10:12:44 2015 -0400
+++ b/mercurial/tags.py	Thu Apr 16 12:01:00 2015 -0400
@@ -36,32 +36,23 @@
 # repositories with very large manifests. Multiplied by dozens or even
 # hundreds of heads and there is a significant performance concern.
 #
-# The "tags" cache stores information about heads and the history of tags.
-#
-# The cache file consists of two parts. The first part maps head nodes
-# to .hgtags filenodes. The second part is a history of tags. The two
-# parts are separated by an empty line.
+# The "tags" cache stores information about the history of tags.
 #
-# The filenodes part of "tags" has effectively been superseded by
-# "hgtagsfnodes1." It is being kept around for backwards compatbility.
+# The cache file consists of a cache validation line followed by a history
+# of tags.
 #
-# The first part consists of lines of the form:
+# The cache validation line has the format:
 #
-#   <headrev> <headnode> [<hgtagsnode>]
+#   <tiprev> <tipnode> [<filteredhash>]
 #
-# <headrev> is an integer revision and <headnode> is a 40 character hex
-# node for that changeset. These redundantly identify a repository
-# head from the time the cache was written.
-#
-# <tagnode> is the filenode of .hgtags on that head. Heads with no .hgtags
-# file will have no <hgtagsnode> (just 2 values per line).
+# <tiprev> is an integer revision and <tipnode> is a 40 character hex
+# node for that changeset. These redundantly identify the repository
+# tip from the time the cache was written. In addition, <filteredhash>,
+# if present, is a 40 character hex hash of the contents of the filtered
+# revisions for this filter. If the set of filtered revs changes, the
+# hash will change and invalidate the cache.
 #
-# The filenode cache is ordered from tip to oldest (which is part of why
-# <headrev> is there: a quick check of the tip from when the cache was
-# written against the current tip is all that is needed to check whether
-# the cache is up to date).
-#
-# The second part of the tags cache consists of lines of the form:
+# The history part of the tags cache consists of lines of the form:
 #
 #   <node> <tag>
 #
@@ -94,7 +85,7 @@
     assert len(alltags) == len(tagtypes) == 0, \
            "findglobaltags() should be called first"
 
-    (heads, tagfnode, cachetags, shouldwrite) = _readtagcache(ui, repo)
+    (heads, tagfnode, valid, cachetags, shouldwrite) = _readtagcache(ui, repo)
     if cachetags is not None:
         assert not shouldwrite
         # XXX is this really 100% correct?  are there oddball special
@@ -122,7 +113,7 @@
 
     # and update the cache (if necessary)
     if shouldwrite:
-        _writetagcache(ui, repo, heads, tagfnode, alltags)
+        _writetagcache(ui, repo, valid, alltags)
 
 def readlocaltags(ui, repo, alltags, tagtypes):
     '''Read local tags in repo. Update alltags and tagtypes.'''
@@ -256,20 +247,22 @@
 def _readtagcache(ui, repo):
     '''Read the tag cache.
 
-    Returns a tuple (heads, fnodes, cachetags, shouldwrite).
+    Returns a tuple (heads, fnodes, validinfo, cachetags, shouldwrite).
 
     If the cache is completely up-to-date, "cachetags" is a dict of the
-    form returned by _readtags() and "heads" and "fnodes" are None and
-    "shouldwrite" is False.
+    form returned by _readtags() and "heads", "fnodes", and "validinfo" are
+    None and "shouldwrite" is False.
 
     If the cache is not up to date, "cachetags" is None. "heads" is a list
     of all heads currently in the repository, ordered from tip to oldest.
-    "fnodes" is a mapping from head to .hgtags filenode. "shouldwrite" is
-    True.
+    "validinfo" is a tuple describing cache validation info. This is used
+    when writing the tags cache. "fnodes" is a mapping from head to .hgtags
+    filenode. "shouldwrite" is True.
 
     If the cache is not up to date, the caller is responsible for reading tag
     info from each returned head. (See findglobaltags().)
     '''
+    import scmutil  # avoid cycle
 
     try:
         cachefile = repo.vfs(_filename(repo), 'r')
@@ -278,20 +271,17 @@
     except IOError:
         cachefile = None
 
-    cachetiprev = None
-    cachetipnode = None
+    cacherev = None
+    cachenode = None
+    cachehash = None
     if cachefile:
         try:
-            for i, line in enumerate(cachelines):
-                # Getting the first line and consuming all fnode lines.
-                if line == "\n":
-                    break
-                if i != 0:
-                    continue
-
-                line = line.split()
-                cachetiprev = int(line[0])
-                cachetipnode = bin(line[1])
+            validline = cachelines.next()
+            validline = validline.split()
+            cacherev = int(validline[0])
+            cachenode = bin(validline[1])
+            if len(validline) > 2:
+                cachehash = bin(validline[2])
         except Exception:
             # corruption of the cache, just recompute it.
             pass
@@ -303,20 +293,22 @@
     # (Unchanged tip trivially means no changesets have been added.
     # But, thanks to localrepository.destroyed(), it also means none
     # have been destroyed by strip or rollback.)
-    if (cachetiprev is not None
-            and cachetiprev == tiprev
-            and cachetipnode == tipnode):
+    if (cacherev == tiprev
+            and cachenode == tipnode
+            and cachehash == scmutil.filteredhash(repo, tiprev)):
         tags = _readtags(ui, repo, cachelines, cachefile.name)
         cachefile.close()
-        return (None, None, tags, False)
+        return (None, None, None, tags, False)
     if cachefile:
         cachefile.close()               # ignore rest of file
 
+    valid = (tiprev, tipnode, scmutil.filteredhash(repo, tiprev))
+
     repoheads = repo.heads()
     # Case 2 (uncommon): empty repo; get out quickly and don't bother
     # writing an empty cache.
     if repoheads == [nullid]:
-        return ([], {}, {}, False)
+        return ([], {}, valid, {}, False)
 
     # Case 3 (uncommon): cache file missing or empty.
 
@@ -334,7 +326,7 @@
     if not len(repo.file('.hgtags')):
         # No tags have ever been committed, so we can avoid a
         # potentially expensive search.
-        return (repoheads, {}, None, True)
+        return (repoheads, {}, valid, None, True)
 
     starttime = time.time()
 
@@ -359,44 +351,26 @@
 
     # Caller has to iterate over all heads, but can use the filenodes in
     # cachefnode to get to each .hgtags revision quickly.
-    return (repoheads, cachefnode, None, True)
+    return (repoheads, cachefnode, valid, None, True)
 
-def _writetagcache(ui, repo, heads, tagfnode, cachetags):
+def _writetagcache(ui, repo, valid, cachetags):
     try:
         cachefile = repo.vfs(_filename(repo), 'w', atomictemp=True)
     except (OSError, IOError):
         return
 
-    ui.log('tagscache', 'writing tags cache file with %d heads and %d tags\n',
-            len(heads), len(cachetags))
+    ui.log('tagscache', 'writing tags cache file with %d tags\n',
+           len(cachetags))
 
-    realheads = repo.heads()            # for sanity checks below
-    for head in heads:
-        # temporary sanity checks; these can probably be removed
-        # once this code has been in crew for a few weeks
-        assert head in repo.changelog.nodemap, \
-               'trying to write non-existent node %s to tag cache' % short(head)
-        assert head in realheads, \
-               'trying to write non-head %s to tag cache' % short(head)
-        assert head != nullid, \
-               'trying to write nullid to tag cache'
-
-        # This can't fail because of the first assert above.  When/if we
-        # remove that assert, we might want to catch LookupError here
-        # and downgrade it to a warning.
-        rev = repo.changelog.rev(head)
-
-        fnode = tagfnode.get(head)
-        if fnode:
-            cachefile.write('%d %s %s\n' % (rev, hex(head), hex(fnode)))
-        else:
-            cachefile.write('%d %s\n' % (rev, hex(head)))
+    if valid[2]:
+        cachefile.write('%d %s %s\n' % (valid[0], hex(valid[1]), hex(valid[2])))
+    else:
+        cachefile.write('%d %s\n' % (valid[0], hex(valid[1])))
 
     # Tag names in the cache are in UTF-8 -- which is the whole reason
     # we keep them in UTF-8 throughout this module.  If we converted
     # them local encoding on input, we would lose info writing them to
     # the cache.
-    cachefile.write('\n')
     for (name, (node, hist)) in sorted(cachetags.iteritems()):
         for n in hist:
             cachefile.write("%s %s\n" % (hex(n), name))
--- a/tests/test-blackbox.t	Thu Apr 16 10:12:44 2015 -0400
+++ b/tests/test-blackbox.t	Thu Apr 16 12:01:00 2015 -0400
@@ -123,7 +123,7 @@
   1 files updated, 0 files merged, 0 files removed, 0 files unresolved
   $ hg blackbox -l 5
   1970/01/01 00:00:00 bob> update
-  1970/01/01 00:00:00 bob> writing tags cache file with 2 heads and 0 tags
+  1970/01/01 00:00:00 bob> writing tags cache file with 0 tags
   1970/01/01 00:00:00 bob> pythonhook-preupdate: hgext.eol.preupdate finished in * seconds (glob)
   1970/01/01 00:00:00 bob> exthook-update: echo hooked finished in * seconds (glob)
   1970/01/01 00:00:00 bob> update exited 0 after * seconds (glob)
--- a/tests/test-mq.t	Thu Apr 16 10:12:44 2015 -0400
+++ b/tests/test-mq.t	Thu Apr 16 12:01:00 2015 -0400
@@ -318,7 +318,6 @@
 
   $ cat .hg/cache/tags-visible
   1 [\da-f]{40} (re)
-  
   $ hg qpush
   applying test.patch
   now at: test.patch
@@ -330,7 +329,6 @@
 
   $ cat .hg/cache/tags-visible
   2 [\da-f]{40} (re)
-  
   $ checkundo qpush
   $ cd ..
 
--- a/tests/test-obsolete-tag-cache.t	Thu Apr 16 10:12:44 2015 -0400
+++ b/tests/test-obsolete-tag-cache.t	Thu Apr 16 12:01:00 2015 -0400
@@ -1,6 +1,8 @@
   $ cat >> $HGRCPATH << EOF
   > [extensions]
+  > blackbox=
   > rebase=
+  > mock=$TESTDIR/mockblackbox.py
   > 
   > [experimental]
   > evolution = createmarkers
@@ -21,11 +23,12 @@
   $ hg commit -A -m newhead
   adding newhead
   created new head
-
-Trigger tags cache population by doing something that accesses tags info
+  $ hg tag -m 'test head 2 tag' head2
 
   $ hg log -G -T '{rev}:{node|short} {tags} {desc}\n'
-  @  4:042eb6bfcc49 tip newhead
+  @  5:2942a772f72a tip test head 2 tag
+  |
+  o  4:042eb6bfcc49 head2 newhead
   |
   | o  3:c3cb30f2d2cd  test2 tag
   | |
@@ -36,35 +39,55 @@
   o  0:55482a6fb4b1 test1 initial
   
 
+Trigger tags cache population by doing something that accesses tags info
+
+  $ hg tags
+  tip                                5:2942a772f72a
+  head2                              4:042eb6bfcc49
+  test2                              2:d75775ffbc6b
+  test1                              0:55482a6fb4b1
+
   $ cat .hg/cache/tags-visible
-  4 042eb6bfcc4909bad84a1cbf6eb1ddf0ab587d41
-  3 c3cb30f2d2cd0aae008cc91a07876e3c5131fd22 b3bce87817fe7ac9dca2834366c1d7534c095cf1
-  
+  5 2942a772f72a444bef4bef13874d515f50fa27b6
+  042eb6bfcc4909bad84a1cbf6eb1ddf0ab587d41 head2
   55482a6fb4b1881fa8f746fd52cf6f096bb21c89 test1
   d75775ffbc6bca1794d300f5571272879bd280da test2
 
-Create some hidden changesets via a rebase and trigger tags cache
-repopulation
+Hiding a non-tip changeset should change filtered hash and cause tags recompute
+
+  $ hg debugobsolete -d '0 0' c3cb30f2d2cd0aae008cc91a07876e3c5131fd22 -u dummyuser
 
-  $ hg -q rebase -s 1 -d 4
-  $ hg log -G -T '{rev}:{node|short} {tags} {desc}\n'
-  o  7:eb610439e10e tip test2 tag
-  |
-  o  6:7b4af00c3c83  first
-  |
-  o  5:43ac2a539b3c  test tag
-  |
-  @  4:042eb6bfcc49  newhead
-  |
-  o  0:55482a6fb4b1 test1 initial
-  
-
-.hgtags filenodes for hidden heads should be visible (issue4550)
-(currently broken)
+  $ hg tags
+  tip                                5:2942a772f72a
+  head2                              4:042eb6bfcc49
+  test1                              0:55482a6fb4b1
 
   $ cat .hg/cache/tags-visible
-  7 eb610439e10e0c6b296f97b59624c2e24fc59e30 b3bce87817fe7ac9dca2834366c1d7534c095cf1
-  
+  5 2942a772f72a444bef4bef13874d515f50fa27b6 f34fbc9a9769ba9eff5aff3d008a6b49f85c08b1
+  042eb6bfcc4909bad84a1cbf6eb1ddf0ab587d41 head2
   55482a6fb4b1881fa8f746fd52cf6f096bb21c89 test1
-  d75775ffbc6bca1794d300f5571272879bd280da test2
+
+  $ hg blackbox -l 4
+  1970/01/01 00:00:00 bob> tags
+  1970/01/01 00:00:00 bob> 2/2 cache hits/lookups in * seconds (glob)
+  1970/01/01 00:00:00 bob> writing tags cache file with 2 tags
+  1970/01/01 00:00:00 bob> tags exited 0 after * seconds (glob)
+
+Hiding another changeset should cause the filtered hash to change
+
+  $ hg debugobsolete -d '0 0' d75775ffbc6bca1794d300f5571272879bd280da -u dummyuser
+  $ hg debugobsolete -d '0 0' 5f97d42da03fd56f3b228b03dfe48af5c0adf75b -u dummyuser
 
+  $ hg tags
+  tip                                5:2942a772f72a
+  head2                              4:042eb6bfcc49
+
+  $ cat .hg/cache/tags-visible
+  5 2942a772f72a444bef4bef13874d515f50fa27b6 2fce1eec33263d08a4d04293960fc73a555230e4
+  042eb6bfcc4909bad84a1cbf6eb1ddf0ab587d41 head2
+
+  $ hg blackbox -l 4
+  1970/01/01 00:00:00 bob> tags
+  1970/01/01 00:00:00 bob> 1/1 cache hits/lookups in * seconds (glob)
+  1970/01/01 00:00:00 bob> writing tags cache file with 1 tags
+  1970/01/01 00:00:00 bob> tags exited 0 after * seconds (glob)
--- a/tests/test-tags.t	Thu Apr 16 10:12:44 2015 -0400
+++ b/tests/test-tags.t	Thu Apr 16 12:01:00 2015 -0400
@@ -133,7 +133,7 @@
   1970/01/01 00:00:00 bob> identify
   1970/01/01 00:00:00 bob> writing 48 bytes to cache/hgtagsfnodes1
   1970/01/01 00:00:00 bob> 0/1 cache hits/lookups in * seconds (glob)
-  1970/01/01 00:00:00 bob> writing tags cache file with 1 heads and 1 tags
+  1970/01/01 00:00:00 bob> writing tags cache file with 1 tags
   1970/01/01 00:00:00 bob> identify exited 0 after ?.?? seconds (glob)
 
 Failure to acquire lock results in no write
@@ -146,7 +146,7 @@
   1970/01/01 00:00:00 bob> identify
   1970/01/01 00:00:00 bob> not writing .hg/cache/hgtagsfnodes1 because lock held
   1970/01/01 00:00:00 bob> 0/1 cache hits/lookups in * seconds (glob)
-  1970/01/01 00:00:00 bob> writing tags cache file with 1 heads and 1 tags
+  1970/01/01 00:00:00 bob> writing tags cache file with 1 tags
   1970/01/01 00:00:00 bob> identify exited 0 after * seconds (glob)
 
   $ fnodescacheexists
@@ -312,10 +312,7 @@
 Dump cache:
 
   $ cat .hg/cache/tags-visible
-  4 0c192d7d5e6b78a714de54a2e9627952a877e25a 0c04f2a8af31de17fab7422878ee5a2dadbc943d
-  3 6fa450212aeb2a21ed616a54aea39a4a27894cd7 7d3b718c964ef37b89e550ebdafd5789e76ce1b0
-  2 7a94127795a33c10a370c93f731fd9fea0b79af6 0c04f2a8af31de17fab7422878ee5a2dadbc943d
-  
+  4 0c192d7d5e6b78a714de54a2e9627952a877e25a
   bbd179dfa0a71671c253b3ae0aa1513b60d199fa bar
   bbd179dfa0a71671c253b3ae0aa1513b60d199fa bar
   78391a272241d70354aa14c874552cad6b51bb42 bar
@@ -346,7 +343,7 @@
   1970/01/01 00:00:00 bob> tags
   1970/01/01 00:00:00 bob> writing 24 bytes to cache/hgtagsfnodes1
   1970/01/01 00:00:00 bob> 2/3 cache hits/lookups in * seconds (glob)
-  1970/01/01 00:00:00 bob> writing tags cache file with 3 heads and 1 tags
+  1970/01/01 00:00:00 bob> writing tags cache file with 1 tags
   1970/01/01 00:00:00 bob> tags exited 0 after * seconds (glob)
 
 #if unix-permissions no-root
@@ -366,7 +363,7 @@
   1970/01/01 00:00:00 bob> tags
   1970/01/01 00:00:00 bob> couldn't write cache/hgtagsfnodes1: [Errno 13] Permission denied: '$TESTTMP/t2/.hg/cache/hgtagsfnodes1'
   1970/01/01 00:00:00 bob> 2/3 cache hits/lookups in * seconds (glob)
-  1970/01/01 00:00:00 bob> writing tags cache file with 3 heads and 1 tags
+  1970/01/01 00:00:00 bob> writing tags cache file with 1 tags
   1970/01/01 00:00:00 bob> tags exited 0 after * seconds (glob)
 
   $ chmod a+w .hg/cache/hgtagsfnodes1
@@ -381,7 +378,7 @@
   1970/01/01 00:00:00 bob> tags
   1970/01/01 00:00:00 bob> writing 24 bytes to cache/hgtagsfnodes1
   1970/01/01 00:00:00 bob> 2/3 cache hits/lookups in * seconds (glob)
-  1970/01/01 00:00:00 bob> writing tags cache file with 3 heads and 1 tags
+  1970/01/01 00:00:00 bob> writing tags cache file with 1 tags
   1970/01/01 00:00:00 bob> tags exited 0 after * seconds (glob)
 
   $ f --size .hg/cache/hgtagsfnodes1
@@ -397,7 +394,7 @@
   $ hg blackbox -l 4
   1970/01/01 00:00:00 bob> tags
   1970/01/01 00:00:00 bob> 3/3 cache hits/lookups in * seconds (glob)
-  1970/01/01 00:00:00 bob> writing tags cache file with 3 heads and 1 tags
+  1970/01/01 00:00:00 bob> writing tags cache file with 1 tags
   1970/01/01 00:00:00 bob> tags exited 0 after * seconds (glob)
 
   $ f --size .hg/cache/hgtagsfnodes1
@@ -414,7 +411,7 @@
   1970/01/01 00:00:00 bob> tags
   1970/01/01 00:00:00 bob> writing 24 bytes to cache/hgtagsfnodes1
   1970/01/01 00:00:00 bob> 2/3 cache hits/lookups in * seconds (glob)
-  1970/01/01 00:00:00 bob> writing tags cache file with 3 heads and 1 tags
+  1970/01/01 00:00:00 bob> writing tags cache file with 1 tags
   1970/01/01 00:00:00 bob> tags exited 0 after * seconds (glob)
   $ f --size .hg/cache/hgtagsfnodes1
   .hg/cache/hgtagsfnodes1: size=144