hgext/narrow/narrowchangegroup.py
branchstable
changeset 40404 956ec6f1320d
parent 40131 535fc8a22365
parent 40403 bf249bb60087
child 40405 4185bc53d1e3
equal deleted inserted replaced
40131:535fc8a22365 40404:956ec6f1320d
     1 # narrowchangegroup.py - narrow clone changegroup creation and consumption
       
     2 #
       
     3 # Copyright 2017 Google, Inc.
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 
       
     8 from __future__ import absolute_import
       
     9 
       
    10 from mercurial.i18n import _
       
    11 from mercurial import (
       
    12     changegroup,
       
    13     error,
       
    14     extensions,
       
    15     manifest,
       
    16     match as matchmod,
       
    17     mdiff,
       
    18     node,
       
    19     revlog,
       
    20     util,
       
    21 )
       
    22 
       
    23 def setup():
       
    24 
       
    25     def _cgmatcher(cgpacker):
       
    26         localmatcher = cgpacker._repo.narrowmatch()
       
    27         remotematcher = getattr(cgpacker, '_narrow_matcher', lambda: None)()
       
    28         if remotematcher:
       
    29             return matchmod.intersectmatchers(localmatcher, remotematcher)
       
    30         else:
       
    31             return localmatcher
       
    32 
       
    33     def prune(orig, self, revlog, missing, commonrevs):
       
    34         if isinstance(revlog, manifest.manifestrevlog):
       
    35             matcher = _cgmatcher(self)
       
    36             if (matcher and
       
    37                 not matcher.visitdir(revlog._dir[:-1] or '.')):
       
    38                 return []
       
    39         return orig(self, revlog, missing, commonrevs)
       
    40 
       
    41     extensions.wrapfunction(changegroup.cg1packer, 'prune', prune)
       
    42 
       
    43     def generatefiles(orig, self, changedfiles, linknodes, commonrevs,
       
    44                       source):
       
    45         matcher = _cgmatcher(self)
       
    46         if matcher:
       
    47             changedfiles = list(filter(matcher, changedfiles))
       
    48         if getattr(self, 'is_shallow', False):
       
    49             # See comment in generate() for why this sadness is a thing.
       
    50             mfdicts = self._mfdicts
       
    51             del self._mfdicts
       
    52             # In a shallow clone, the linknodes callback needs to also include
       
    53             # those file nodes that are in the manifests we sent but weren't
       
    54             # introduced by those manifests.
       
    55             commonctxs = [self._repo[c] for c in commonrevs]
       
    56             oldlinknodes = linknodes
       
    57             clrev = self._repo.changelog.rev
       
    58             def linknodes(flog, fname):
       
    59                 for c in commonctxs:
       
    60                     try:
       
    61                         fnode = c.filenode(fname)
       
    62                         self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
       
    63                     except error.ManifestLookupError:
       
    64                         pass
       
    65                 links = oldlinknodes(flog, fname)
       
    66                 if len(links) != len(mfdicts):
       
    67                     for mf, lr in mfdicts:
       
    68                         fnode = mf.get(fname, None)
       
    69                         if fnode in links:
       
    70                             links[fnode] = min(links[fnode], lr, key=clrev)
       
    71                         elif fnode:
       
    72                             links[fnode] = lr
       
    73                 return links
       
    74         return orig(self, changedfiles, linknodes, commonrevs, source)
       
    75     extensions.wrapfunction(
       
    76         changegroup.cg1packer, 'generatefiles', generatefiles)
       
    77 
       
    78     def ellipsisdata(packer, rev, revlog_, p1, p2, data, linknode):
       
    79         n = revlog_.node(rev)
       
    80         p1n, p2n = revlog_.node(p1), revlog_.node(p2)
       
    81         flags = revlog_.flags(rev)
       
    82         flags |= revlog.REVIDX_ELLIPSIS
       
    83         meta = packer.builddeltaheader(
       
    84             n, p1n, p2n, node.nullid, linknode, flags)
       
    85         # TODO: try and actually send deltas for ellipsis data blocks
       
    86         diffheader = mdiff.trivialdiffheader(len(data))
       
    87         l = len(meta) + len(diffheader) + len(data)
       
    88         return ''.join((changegroup.chunkheader(l),
       
    89                         meta,
       
    90                         diffheader,
       
    91                         data))
       
    92 
       
    93     def close(orig, self):
       
    94         getattr(self, 'clrev_to_localrev', {}).clear()
       
    95         if getattr(self, 'next_clrev_to_localrev', {}):
       
    96             self.clrev_to_localrev = self.next_clrev_to_localrev
       
    97             del self.next_clrev_to_localrev
       
    98         self.changelog_done = True
       
    99         return orig(self)
       
   100     extensions.wrapfunction(changegroup.cg1packer, 'close', close)
       
   101 
       
   102     # In a perfect world, we'd generate better ellipsis-ified graphs
       
   103     # for non-changelog revlogs. In practice, we haven't started doing
       
   104     # that yet, so the resulting DAGs for the manifestlog and filelogs
       
   105     # are actually full of bogus parentage on all the ellipsis
       
   106     # nodes. This has the side effect that, while the contents are
       
   107     # correct, the individual DAGs might be completely out of whack in
       
   108     # a case like 882681bc3166 and its ancestors (back about 10
       
   109     # revisions or so) in the main hg repo.
       
   110     #
       
   111     # The one invariant we *know* holds is that the new (potentially
       
   112     # bogus) DAG shape will be valid if we order the nodes in the
       
   113     # order that they're introduced in dramatis personae by the
       
   114     # changelog, so what we do is we sort the non-changelog histories
       
   115     # by the order in which they are used by the changelog.
       
   116     def _sortgroup(orig, self, revlog, nodelist, lookup):
       
   117         if not util.safehasattr(self, 'full_nodes') or not self.clnode_to_rev:
       
   118             return orig(self, revlog, nodelist, lookup)
       
   119         key = lambda n: self.clnode_to_rev[lookup(n)]
       
   120         return [revlog.rev(n) for n in sorted(nodelist, key=key)]
       
   121 
       
   122     extensions.wrapfunction(changegroup.cg1packer, '_sortgroup', _sortgroup)
       
   123 
       
   124     def generate(orig, self, commonrevs, clnodes, fastpathlinkrev, source):
       
   125         '''yield a sequence of changegroup chunks (strings)'''
       
   126         # Note: other than delegating to orig, the only deviation in
       
   127         # logic from normal hg's generate is marked with BEGIN/END
       
   128         # NARROW HACK.
       
   129         if not util.safehasattr(self, 'full_nodes'):
       
   130             # not sending a narrow bundle
       
   131             for x in orig(self, commonrevs, clnodes, fastpathlinkrev, source):
       
   132                 yield x
       
   133             return
       
   134 
       
   135         repo = self._repo
       
   136         cl = repo.changelog
       
   137         mfl = repo.manifestlog
       
   138         mfrevlog = mfl._revlog
       
   139 
       
   140         clrevorder = {}
       
   141         mfs = {} # needed manifests
       
   142         fnodes = {} # needed file nodes
       
   143         changedfiles = set()
       
   144 
       
   145         # Callback for the changelog, used to collect changed files and manifest
       
   146         # nodes.
       
   147         # Returns the linkrev node (identity in the changelog case).
       
   148         def lookupcl(x):
       
   149             c = cl.read(x)
       
   150             clrevorder[x] = len(clrevorder)
       
   151             # BEGIN NARROW HACK
       
   152             #
       
   153             # Only update mfs if x is going to be sent. Otherwise we
       
   154             # end up with bogus linkrevs specified for manifests and
       
   155             # we skip some manifest nodes that we should otherwise
       
   156             # have sent.
       
   157             if x in self.full_nodes or cl.rev(x) in self.precomputed_ellipsis:
       
   158                 n = c[0]
       
   159                 # record the first changeset introducing this manifest version
       
   160                 mfs.setdefault(n, x)
       
   161                 # Set this narrow-specific dict so we have the lowest manifest
       
   162                 # revnum to look up for this cl revnum. (Part of mapping
       
   163                 # changelog ellipsis parents to manifest ellipsis parents)
       
   164                 self.next_clrev_to_localrev.setdefault(cl.rev(x),
       
   165                                                        mfrevlog.rev(n))
       
   166             # We can't trust the changed files list in the changeset if the
       
   167             # client requested a shallow clone.
       
   168             if self.is_shallow:
       
   169                 changedfiles.update(mfl[c[0]].read().keys())
       
   170             else:
       
   171                 changedfiles.update(c[3])
       
   172             # END NARROW HACK
       
   173             # Record a complete list of potentially-changed files in
       
   174             # this manifest.
       
   175             return x
       
   176 
       
   177         self._verbosenote(_('uncompressed size of bundle content:\n'))
       
   178         size = 0
       
   179         for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
       
   180             size += len(chunk)
       
   181             yield chunk
       
   182         self._verbosenote(_('%8.i (changelog)\n') % size)
       
   183 
       
   184         # We need to make sure that the linkrev in the changegroup refers to
       
   185         # the first changeset that introduced the manifest or file revision.
       
   186         # The fastpath is usually safer than the slowpath, because the filelogs
       
   187         # are walked in revlog order.
       
   188         #
       
   189         # When taking the slowpath with reorder=None and the manifest revlog
       
   190         # uses generaldelta, the manifest may be walked in the "wrong" order.
       
   191         # Without 'clrevorder', we would get an incorrect linkrev (see fix in
       
   192         # cc0ff93d0c0c).
       
   193         #
       
   194         # When taking the fastpath, we are only vulnerable to reordering
       
   195         # of the changelog itself. The changelog never uses generaldelta, so
       
   196         # it is only reordered when reorder=True. To handle this case, we
       
   197         # simply take the slowpath, which already has the 'clrevorder' logic.
       
   198         # This was also fixed in cc0ff93d0c0c.
       
   199         fastpathlinkrev = fastpathlinkrev and not self._reorder
       
   200         # Treemanifests don't work correctly with fastpathlinkrev
       
   201         # either, because we don't discover which directory nodes to
       
   202         # send along with files. This could probably be fixed.
       
   203         fastpathlinkrev = fastpathlinkrev and (
       
   204             'treemanifest' not in repo.requirements)
       
   205         # Shallow clones also don't work correctly with fastpathlinkrev
       
   206         # because file nodes may need to be sent for a manifest even if they
       
   207         # weren't introduced by that manifest.
       
   208         fastpathlinkrev = fastpathlinkrev and not self.is_shallow
       
   209 
       
   210         for chunk in self.generatemanifests(commonrevs, clrevorder,
       
   211                 fastpathlinkrev, mfs, fnodes, source):
       
   212             yield chunk
       
   213         # BEGIN NARROW HACK
       
   214         mfdicts = None
       
   215         if self.is_shallow:
       
   216             mfdicts = [(self._repo.manifestlog[n].read(), lr)
       
   217                        for (n, lr) in mfs.iteritems()]
       
   218         # END NARROW HACK
       
   219         mfs.clear()
       
   220         clrevs = set(cl.rev(x) for x in clnodes)
       
   221 
       
   222         if not fastpathlinkrev:
       
   223             def linknodes(unused, fname):
       
   224                 return fnodes.get(fname, {})
       
   225         else:
       
   226             cln = cl.node
       
   227             def linknodes(filerevlog, fname):
       
   228                 llr = filerevlog.linkrev
       
   229                 fln = filerevlog.node
       
   230                 revs = ((r, llr(r)) for r in filerevlog)
       
   231                 return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
       
   232 
       
   233         # BEGIN NARROW HACK
       
   234         #
       
   235         # We need to pass the mfdicts variable down into
       
   236         # generatefiles(), but more than one command might have
       
   237         # wrapped generatefiles so we can't modify the function
       
   238         # signature. Instead, we pass the data to ourselves using an
       
   239         # instance attribute. I'm sorry.
       
   240         self._mfdicts = mfdicts
       
   241         # END NARROW HACK
       
   242         for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
       
   243                                         source):
       
   244             yield chunk
       
   245 
       
   246         yield self.close()
       
   247 
       
   248         if clnodes:
       
   249             repo.hook('outgoing', node=node.hex(clnodes[0]), source=source)
       
   250     extensions.wrapfunction(changegroup.cg1packer, 'generate', generate)
       
   251 
       
   252     def revchunk(orig, self, revlog, rev, prev, linknode):
       
   253         if not util.safehasattr(self, 'full_nodes'):
       
   254             # not sending a narrow changegroup
       
   255             for x in orig(self, revlog, rev, prev, linknode):
       
   256                 yield x
       
   257             return
       
   258         # build up some mapping information that's useful later. See
       
   259         # the local() nested function below.
       
   260         if not self.changelog_done:
       
   261             self.clnode_to_rev[linknode] = rev
       
   262             linkrev = rev
       
   263             self.clrev_to_localrev[linkrev] = rev
       
   264         else:
       
   265             linkrev = self.clnode_to_rev[linknode]
       
   266             self.clrev_to_localrev[linkrev] = rev
       
   267         # This is a node to send in full, because the changeset it
       
   268         # corresponds to was a full changeset.
       
   269         if linknode in self.full_nodes:
       
   270             for x in orig(self, revlog, rev, prev, linknode):
       
   271                 yield x
       
   272             return
       
   273         # At this point, a node can either be one we should skip or an
       
   274         # ellipsis. If it's not an ellipsis, bail immediately.
       
   275         if linkrev not in self.precomputed_ellipsis:
       
   276             return
       
   277         linkparents = self.precomputed_ellipsis[linkrev]
       
   278         def local(clrev):
       
   279             """Turn a changelog revnum into a local revnum.
       
   280 
       
   281             The ellipsis dag is stored as revnums on the changelog,
       
   282             but when we're producing ellipsis entries for
       
   283             non-changelog revlogs, we need to turn those numbers into
       
   284             something local. This does that for us, and during the
       
   285             changelog sending phase will also expand the stored
       
   286             mappings as needed.
       
   287             """
       
   288             if clrev == node.nullrev:
       
   289                 return node.nullrev
       
   290             if not self.changelog_done:
       
   291                 # If we're doing the changelog, it's possible that we
       
   292                 # have a parent that is already on the client, and we
       
   293                 # need to store some extra mapping information so that
       
   294                 # our contained ellipsis nodes will be able to resolve
       
   295                 # their parents.
       
   296                 if clrev not in self.clrev_to_localrev:
       
   297                     clnode = revlog.node(clrev)
       
   298                     self.clnode_to_rev[clnode] = clrev
       
   299                 return clrev
       
   300             # Walk the ellipsis-ized changelog breadth-first looking for a
       
   301             # change that has been linked from the current revlog.
       
   302             #
       
   303             # For a flat manifest revlog only a single step should be necessary
       
   304             # as all relevant changelog entries are relevant to the flat
       
   305             # manifest.
       
   306             #
       
   307             # For a filelog or tree manifest dirlog however not every changelog
       
   308             # entry will have been relevant, so we need to skip some changelog
       
   309             # nodes even after ellipsis-izing.
       
   310             walk = [clrev]
       
   311             while walk:
       
   312                 p = walk[0]
       
   313                 walk = walk[1:]
       
   314                 if p in self.clrev_to_localrev:
       
   315                     return self.clrev_to_localrev[p]
       
   316                 elif p in self.full_nodes:
       
   317                     walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
       
   318                                     if pp != node.nullrev])
       
   319                 elif p in self.precomputed_ellipsis:
       
   320                     walk.extend([pp for pp in self.precomputed_ellipsis[p]
       
   321                                     if pp != node.nullrev])
       
   322                 else:
       
   323                     # In this case, we've got an ellipsis with parents
       
   324                     # outside the current bundle (likely an
       
   325                     # incremental pull). We "know" that we can use the
       
   326                     # value of this same revlog at whatever revision
       
   327                     # is pointed to by linknode. "Know" is in scare
       
   328                     # quotes because I haven't done enough examination
       
   329                     # of edge cases to convince myself this is really
       
   330                     # a fact - it works for all the (admittedly
       
   331                     # thorough) cases in our testsuite, but I would be
       
   332                     # somewhat unsurprised to find a case in the wild
       
   333                     # where this breaks down a bit. That said, I don't
       
   334                     # know if it would hurt anything.
       
   335                     for i in xrange(rev, 0, -1):
       
   336                         if revlog.linkrev(i) == clrev:
       
   337                             return i
       
   338                     # We failed to resolve a parent for this node, so
       
   339                     # we crash the changegroup construction.
       
   340                     raise error.Abort(
       
   341                         'unable to resolve parent while packing %r %r'
       
   342                         ' for changeset %r' % (revlog.indexfile, rev, clrev))
       
   343             return node.nullrev
       
   344 
       
   345         if not linkparents or (
       
   346             revlog.parentrevs(rev) == (node.nullrev, node.nullrev)):
       
   347             p1, p2 = node.nullrev, node.nullrev
       
   348         elif len(linkparents) == 1:
       
   349             p1, = sorted(local(p) for p in linkparents)
       
   350             p2 = node.nullrev
       
   351         else:
       
   352             p1, p2 = sorted(local(p) for p in linkparents)
       
   353         n = revlog.node(rev)
       
   354         yield ellipsisdata(
       
   355             self, rev, revlog, p1, p2, revlog.revision(n), linknode)
       
   356     extensions.wrapfunction(changegroup.cg1packer, 'revchunk', revchunk)
       
   357 
       
   358     def deltaparent(orig, self, revlog, rev, p1, p2, prev):
       
   359         if util.safehasattr(self, 'full_nodes'):
       
   360             # TODO: send better deltas when in narrow mode.
       
   361             #
       
   362             # changegroup.group() loops over revisions to send,
       
   363             # including revisions we'll skip. What this means is that
       
   364             # `prev` will be a potentially useless delta base for all
       
   365             # ellipsis nodes, as the client likely won't have it. In
       
   366             # the future we should do bookkeeping about which nodes
       
   367             # have been sent to the client, and try to be
       
   368             # significantly smarter about delta bases. This is
       
   369             # slightly tricky because this same code has to work for
       
   370             # all revlogs, and we don't have the linkrev/linknode here.
       
   371             return p1
       
   372         return orig(self, revlog, rev, p1, p2, prev)
       
   373     extensions.wrapfunction(changegroup.cg2packer, 'deltaparent', deltaparent)