mercurial/tags.py
changeset 24445 c71edbafe603
parent 24143 7b09dbbbd502
child 24532 f5de208a635c
equal deleted inserted replaced
24444:27e3ba73fbb1 24445:c71edbafe603
    16 import encoding
    16 import encoding
    17 import error
    17 import error
    18 import errno
    18 import errno
    19 import time
    19 import time
    20 
    20 
       
    21 # The tags cache stores information about heads and the history of tags.
       
    22 #
       
    23 # The cache file consists of two parts. The first part maps head nodes
       
    24 # to .hgtags filenodes. The second part is a history of tags. The two
       
    25 # parts are separated by an empty line.
       
    26 #
       
    27 # The first part consists of lines of the form:
       
    28 #
       
    29 #   <headrev> <headnode> [<hgtagsnode>]
       
    30 #
       
    31 # <headrev> is an integer revision and <headnode> is a 40 character hex
       
    32 # node for that changeset. These redundantly identify a repository
       
    33 # head from the time the cache was written.
       
    34 #
       
    35 # <tagnode> is the filenode of .hgtags on that head. Heads with no .hgtags
       
    36 # file will have no <hgtagsnode> (just 2 values per line).
       
    37 #
       
    38 # The filenode cache is ordered from tip to oldest (which is part of why
       
    39 # <headrev> is there: a quick check of the tip from when the cache was
       
    40 # written against the current tip is all that is needed to check whether
       
    41 # the cache is up to date).
       
    42 #
       
    43 # The purpose of the filenode cache is to avoid the most expensive part
       
    44 # of finding global tags, which is looking up the .hgtags filenode in the
       
    45 # manifest for each head. This can take over a minute on repositories
       
    46 # that have large manifests and many heads.
       
    47 #
       
    48 # The second part of the tags cache consists of lines of the form:
       
    49 #
       
    50 #   <node> <tag>
       
    51 #
       
    52 # (This format is identical to that of .hgtags files.)
       
    53 #
       
    54 # <tag> is the tag name and <node> is the 40 character hex changeset
       
    55 # the tag is associated with.
       
    56 #
       
    57 # Tags are written sorted by tag name.
       
    58 #
       
    59 # Tags associated with multiple changesets have an entry for each changeset.
       
    60 # The most recent changeset (in terms of revlog ordering for the head
       
    61 # setting it) for each tag is last.
       
    62 
    21 def findglobaltags(ui, repo, alltags, tagtypes):
    63 def findglobaltags(ui, repo, alltags, tagtypes):
    22     '''Find global tags in repo by reading .hgtags from every head that
    64     '''Find global tags in a repo.
    23     has a distinct version of it, using a cache to avoid excess work.
    65 
    24     Updates the dicts alltags, tagtypes in place: alltags maps tag name
    66     "alltags" maps tag name to (node, hist) 2-tuples.
    25     to (node, hist) pair (see _readtags() below), and tagtypes maps tag
    67 
    26     name to tag type ("global" in this case).'''
    68     "tagtypes" maps tag name to tag type. Global tags always have the
       
    69     "global" tag type.
       
    70 
       
    71     The "alltags" and "tagtypes" dicts are updated in place. Empty dicts
       
    72     should be passed in.
       
    73 
       
    74     The tags cache is read and updated as a side-effect of calling.
       
    75     '''
    27     # This is so we can be lazy and assume alltags contains only global
    76     # This is so we can be lazy and assume alltags contains only global
    28     # tags when we pass it to _writetagcache().
    77     # tags when we pass it to _writetagcache().
    29     assert len(alltags) == len(tagtypes) == 0, \
    78     assert len(alltags) == len(tagtypes) == 0, \
    30            "findglobaltags() should be called first"
    79            "findglobaltags() should be called first"
    31 
    80 
    36         # cases where a global tag should outrank a local tag but won't,
    85         # cases where a global tag should outrank a local tag but won't,
    37         # because cachetags does not contain rank info?
    86         # because cachetags does not contain rank info?
    38         _updatetags(cachetags, 'global', alltags, tagtypes)
    87         _updatetags(cachetags, 'global', alltags, tagtypes)
    39         return
    88         return
    40 
    89 
    41     seen = set()                    # set of fnode
    90     seen = set()  # set of fnode
    42     fctx = None
    91     fctx = None
    43     for head in reversed(heads):        # oldest to newest
    92     for head in reversed(heads):  # oldest to newest
    44         assert head in repo.changelog.nodemap, \
    93         assert head in repo.changelog.nodemap, \
    45                "tag cache returned bogus head %s" % short(head)
    94                "tag cache returned bogus head %s" % short(head)
    46 
    95 
    47         fnode = tagfnode.get(head)
    96         fnode = tagfnode.get(head)
    48         if fnode and fnode not in seen:
    97         if fnode and fnode not in seen:
    58     # and update the cache (if necessary)
   107     # and update the cache (if necessary)
    59     if shouldwrite:
   108     if shouldwrite:
    60         _writetagcache(ui, repo, heads, tagfnode, alltags)
   109         _writetagcache(ui, repo, heads, tagfnode, alltags)
    61 
   110 
    62 def readlocaltags(ui, repo, alltags, tagtypes):
   111 def readlocaltags(ui, repo, alltags, tagtypes):
    63     '''Read local tags in repo.  Update alltags and tagtypes.'''
   112     '''Read local tags in repo. Update alltags and tagtypes.'''
    64     try:
   113     try:
    65         data = repo.vfs.read("localtags")
   114         data = repo.vfs.read("localtags")
    66     except IOError, inst:
   115     except IOError, inst:
    67         if inst.errno != errno.ENOENT:
   116         if inst.errno != errno.ENOENT:
    68             raise
   117             raise
    84 
   133 
    85     _updatetags(filetags, "local", alltags, tagtypes)
   134     _updatetags(filetags, "local", alltags, tagtypes)
    86 
   135 
    87 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
   136 def _readtaghist(ui, repo, lines, fn, recode=None, calcnodelines=False):
    88     '''Read tag definitions from a file (or any source of lines).
   137     '''Read tag definitions from a file (or any source of lines).
       
   138 
    89     This function returns two sortdicts with similar information:
   139     This function returns two sortdicts with similar information:
       
   140 
    90     - the first dict, bintaghist, contains the tag information as expected by
   141     - the first dict, bintaghist, contains the tag information as expected by
    91       the _readtags function, i.e. a mapping from tag name to (node, hist):
   142       the _readtags function, i.e. a mapping from tag name to (node, hist):
    92         - node is the node id from the last line read for that name,
   143         - node is the node id from the last line read for that name,
    93         - hist is the list of node ids previously associated with it (in file
   144         - hist is the list of node ids previously associated with it (in file
    94           order).  All node ids are binary, not hex.
   145           order). All node ids are binary, not hex.
       
   146 
    95     - the second dict, hextaglines, is a mapping from tag name to a list of
   147     - the second dict, hextaglines, is a mapping from tag name to a list of
    96       [hexnode, line number] pairs, ordered from the oldest to the newest node.
   148       [hexnode, line number] pairs, ordered from the oldest to the newest node.
       
   149 
    97     When calcnodelines is False the hextaglines dict is not calculated (an
   150     When calcnodelines is False the hextaglines dict is not calculated (an
    98     empty dict is returned). This is done to improve this function's
   151     empty dict is returned). This is done to improve this function's
    99     performance in cases where the line numbers are not needed.
   152     performance in cases where the line numbers are not needed.
   100     '''
   153     '''
   101 
   154 
   137         bintaghist[name].append(nodebin)
   190         bintaghist[name].append(nodebin)
   138     return bintaghist, hextaglines
   191     return bintaghist, hextaglines
   139 
   192 
   140 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
   193 def _readtags(ui, repo, lines, fn, recode=None, calcnodelines=False):
   141     '''Read tag definitions from a file (or any source of lines).
   194     '''Read tag definitions from a file (or any source of lines).
   142     Return a mapping from tag name to (node, hist): node is the node id
   195 
   143     from the last line read for that name, and hist is the list of node
   196     Returns a mapping from tag name to (node, hist).
   144     ids previously associated with it (in file order).  All node ids are
   197 
   145     binary, not hex.'''
   198     "node" is the node id from the last line read for that name. "hist"
       
   199     is the list of node ids previously associated with it (in file order).
       
   200     All node ids are binary, not hex.
       
   201     '''
   146     filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
   202     filetags, nodelines = _readtaghist(ui, repo, lines, fn, recode=recode,
   147                                        calcnodelines=calcnodelines)
   203                                        calcnodelines=calcnodelines)
   148     for tag, taghist in filetags.items():
   204     for tag, taghist in filetags.items():
   149         filetags[tag] = (taghist[-1], taghist[:-1])
   205         filetags[tag] = (taghist[-1], taghist[:-1])
   150     return filetags
   206     return filetags
   172         else:
   228         else:
   173             tagtypes[name] = tagtype
   229             tagtypes[name] = tagtype
   174         ahist.extend([n for n in bhist if n not in ahist])
   230         ahist.extend([n for n in bhist if n not in ahist])
   175         alltags[name] = anode, ahist
   231         alltags[name] = anode, ahist
   176 
   232 
   177 
       
   178 # The tag cache only stores info about heads, not the tag contents
       
   179 # from each head.  I.e. it doesn't try to squeeze out the maximum
       
   180 # performance, but is simpler has a better chance of actually
       
   181 # working correctly.  And this gives the biggest performance win: it
       
   182 # avoids looking up .hgtags in the manifest for every head, and it
       
   183 # can avoid calling heads() at all if there have been no changes to
       
   184 # the repo.
       
   185 
       
   186 def _readtagcache(ui, repo):
   233 def _readtagcache(ui, repo):
   187     '''Read the tag cache and return a tuple (heads, fnodes, cachetags,
   234     '''Read the tag cache.
   188     shouldwrite).  If the cache is completely up-to-date, cachetags is a
   235 
   189     dict of the form returned by _readtags(); otherwise, it is None and
   236     Returns a tuple (heads, fnodes, cachetags, shouldwrite).
   190     heads and fnodes are set.  In that case, heads is the list of all
   237 
   191     heads currently in the repository (ordered from tip to oldest) and
   238     If the cache is completely up-to-date, "cachetags" is a dict of the
   192     fnodes is a mapping from head to .hgtags filenode.  If those two are
   239     form returned by _readtags() and "heads" and "fnodes" are None and
   193     set, caller is responsible for reading tag info from each head.'''
   240     "shouldwrite" is False.
       
   241 
       
   242     If the cache is not up to date, "cachetags" is None. "heads" is a list
       
   243     of all heads currently in the repository, ordered from tip to oldest.
       
   244     "fnodes" is a mapping from head to .hgtags filenode. "shouldwrite" is
       
   245     True.
       
   246 
       
   247     If the cache is not up to date, the caller is responsible for reading tag
       
   248     info from each returned head. (See findglobaltags().)
       
   249     '''
   194 
   250 
   195     try:
   251     try:
   196         cachefile = repo.vfs('cache/tags', 'r')
   252         cachefile = repo.vfs('cache/tags', 'r')
   197         # force reading the file for static-http
   253         # force reading the file for static-http
   198         cachelines = iter(cachefile)
   254         cachelines = iter(cachefile)
   199     except IOError:
   255     except IOError:
   200         cachefile = None
   256         cachefile = None
   201 
   257 
   202     # The cache file consists of lines like
   258     cacherevs = []  # list of headrev
   203     #   <headrev> <headnode> [<tagnode>]
   259     cacheheads = [] # list of headnode
   204     # where <headrev> and <headnode> redundantly identify a repository
   260     cachefnode = {} # map headnode to filenode
   205     # head from the time the cache was written, and <tagnode> is the
       
   206     # filenode of .hgtags on that head.  Heads with no .hgtags file will
       
   207     # have no <tagnode>.  The cache is ordered from tip to oldest (which
       
   208     # is part of why <headrev> is there: a quick visual check is all
       
   209     # that's required to ensure correct order).
       
   210     #
       
   211     # This information is enough to let us avoid the most expensive part
       
   212     # of finding global tags, which is looking up <tagnode> in the
       
   213     # manifest for each head.
       
   214     cacherevs = []                      # list of headrev
       
   215     cacheheads = []                     # list of headnode
       
   216     cachefnode = {}                     # map headnode to filenode
       
   217     if cachefile:
   261     if cachefile:
   218         try:
   262         try:
   219             for line in cachelines:
   263             for line in cachelines:
   220                 if line == "\n":
   264                 if line == "\n":
   221                     break
   265                     break
   299     # Caller has to iterate over all heads, but can use the filenodes in
   343     # Caller has to iterate over all heads, but can use the filenodes in
   300     # cachefnode to get to each .hgtags revision quickly.
   344     # cachefnode to get to each .hgtags revision quickly.
   301     return (repoheads, cachefnode, None, True)
   345     return (repoheads, cachefnode, None, True)
   302 
   346 
   303 def _writetagcache(ui, repo, heads, tagfnode, cachetags):
   347 def _writetagcache(ui, repo, heads, tagfnode, cachetags):
   304 
       
   305     try:
   348     try:
   306         cachefile = repo.vfs('cache/tags', 'w', atomictemp=True)
   349         cachefile = repo.vfs('cache/tags', 'w', atomictemp=True)
   307     except (OSError, IOError):
   350     except (OSError, IOError):
   308         return
   351         return
   309 
   352