--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext/narrow/narrowchangegroup.py Mon Jan 29 16:19:33 2018 -0500
@@ -0,0 +1,385 @@
+# narrowchangegroup.py - narrow clone changegroup creation and consumption
+#
+# Copyright 2017 Google, Inc.
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+from mercurial.i18n import _
+from mercurial import (
+ changegroup,
+ error,
+ extensions,
+ manifest,
+ mdiff,
+ node,
+ util,
+)
+
+from . import (
+ narrowrepo,
+ narrowrevlog,
+)
+
+def setup():
+
+ def supportedoutgoingversions(orig, repo):
+ versions = orig(repo)
+ if narrowrepo.requirement in repo.requirements:
+ versions.discard('01')
+ versions.discard('02')
+ return versions
+
+ extensions.wrapfunction(changegroup, 'supportedoutgoingversions',
+ supportedoutgoingversions)
+
+ def prune(orig, self, revlog, missing, commonrevs):
+ if isinstance(revlog, manifest.manifestrevlog):
+ matcher = getattr(self._repo, 'narrowmatch',
+ getattr(self, '_narrow_matcher', None))
+ if (matcher is not None and
+ not matcher().visitdir(revlog._dir[:-1] or '.')):
+ return []
+ return orig(self, revlog, missing, commonrevs)
+
+ extensions.wrapfunction(changegroup.cg1packer, 'prune', prune)
+
+ def generatefiles(orig, self, changedfiles, linknodes, commonrevs,
+ source):
+ matcher = getattr(self._repo, 'narrowmatch',
+ getattr(self, '_narrow_matcher', None))
+ if matcher is not None:
+ narrowmatch = matcher()
+ changedfiles = filter(narrowmatch, changedfiles)
+ if getattr(self, 'is_shallow', False):
+ # See comment in generate() for why this sadness is a thing.
+ mfdicts = self._mfdicts
+ del self._mfdicts
+ # In a shallow clone, the linknodes callback needs to also include
+ # those file nodes that are in the manifests we sent but weren't
+ # introduced by those manifests.
+ commonctxs = [self._repo[c] for c in commonrevs]
+ oldlinknodes = linknodes
+ clrev = self._repo.changelog.rev
+ def linknodes(flog, fname):
+ for c in commonctxs:
+ try:
+ fnode = c.filenode(fname)
+ self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
+ except error.ManifestLookupError:
+ pass
+ links = oldlinknodes(flog, fname)
+ if len(links) != len(mfdicts):
+ for mf, lr in mfdicts:
+ fnode = mf.get(fname, None)
+ if fnode in links:
+ links[fnode] = min(links[fnode], lr, key=clrev)
+ elif fnode:
+ links[fnode] = lr
+ return links
+ return orig(self, changedfiles, linknodes, commonrevs, source)
+ extensions.wrapfunction(
+ changegroup.cg1packer, 'generatefiles', generatefiles)
+
+ def ellipsisdata(packer, rev, revlog, p1, p2, data, linknode):
+ n = revlog.node(rev)
+ p1n, p2n = revlog.node(p1), revlog.node(p2)
+ flags = revlog.flags(rev)
+ flags |= narrowrevlog.ELLIPSIS_NODE_FLAG
+ meta = packer.builddeltaheader(
+ n, p1n, p2n, node.nullid, linknode, flags)
+ # TODO: try and actually send deltas for ellipsis data blocks
+ diffheader = mdiff.trivialdiffheader(len(data))
+ l = len(meta) + len(diffheader) + len(data)
+ return ''.join((changegroup.chunkheader(l),
+ meta,
+ diffheader,
+ data))
+
+ def close(orig, self):
+ getattr(self, 'clrev_to_localrev', {}).clear()
+ if getattr(self, 'next_clrev_to_localrev', {}):
+ self.clrev_to_localrev = self.next_clrev_to_localrev
+ del self.next_clrev_to_localrev
+ self.changelog_done = True
+ return orig(self)
+ extensions.wrapfunction(changegroup.cg1packer, 'close', close)
+
+ # In a perfect world, we'd generate better ellipsis-ified graphs
+ # for non-changelog revlogs. In practice, we haven't started doing
+ # that yet, so the resulting DAGs for the manifestlog and filelogs
+ # are actually full of bogus parentage on all the ellipsis
+ # nodes. This has the side effect that, while the contents are
+ # correct, the individual DAGs might be completely out of whack in
+ # a case like 882681bc3166 and its ancestors (back about 10
+ # revisions or so) in the main hg repo.
+ #
+ # The one invariant we *know* holds is that the new (potentially
+ # bogus) DAG shape will be valid if we order the nodes in the
+ # order that they're introduced in dramatis personae by the
+ # changelog, so what we do is we sort the non-changelog histories
+ # by the order in which they are used by the changelog.
+ def _sortgroup(orig, self, revlog, nodelist, lookup):
+ if not util.safehasattr(self, 'full_nodes') or not self.clnode_to_rev:
+ return orig(self, revlog, nodelist, lookup)
+ key = lambda n: self.clnode_to_rev[lookup(n)]
+ return [revlog.rev(n) for n in sorted(nodelist, key=key)]
+
+ extensions.wrapfunction(changegroup.cg1packer, '_sortgroup', _sortgroup)
+
+ def generate(orig, self, commonrevs, clnodes, fastpathlinkrev, source):
+ '''yield a sequence of changegroup chunks (strings)'''
+ # Note: other than delegating to orig, the only deviation in
+ # logic from normal hg's generate is marked with BEGIN/END
+ # NARROW HACK.
+ if not util.safehasattr(self, 'full_nodes'):
+ # not sending a narrow bundle
+ for x in orig(self, commonrevs, clnodes, fastpathlinkrev, source):
+ yield x
+ return
+
+ repo = self._repo
+ cl = repo.changelog
+ mfl = repo.manifestlog
+ mfrevlog = mfl._revlog
+
+ clrevorder = {}
+ mfs = {} # needed manifests
+ fnodes = {} # needed file nodes
+ changedfiles = set()
+
+ # Callback for the changelog, used to collect changed files and manifest
+ # nodes.
+ # Returns the linkrev node (identity in the changelog case).
+ def lookupcl(x):
+ c = cl.read(x)
+ clrevorder[x] = len(clrevorder)
+ # BEGIN NARROW HACK
+ #
+ # Only update mfs if x is going to be sent. Otherwise we
+ # end up with bogus linkrevs specified for manifests and
+ # we skip some manifest nodes that we should otherwise
+ # have sent.
+ if x in self.full_nodes or cl.rev(x) in self.precomputed_ellipsis:
+ n = c[0]
+ # record the first changeset introducing this manifest version
+ mfs.setdefault(n, x)
+ # Set this narrow-specific dict so we have the lowest manifest
+ # revnum to look up for this cl revnum. (Part of mapping
+ # changelog ellipsis parents to manifest ellipsis parents)
+ self.next_clrev_to_localrev.setdefault(cl.rev(x),
+ mfrevlog.rev(n))
+ # We can't trust the changed files list in the changeset if the
+ # client requested a shallow clone.
+ if self.is_shallow:
+ changedfiles.update(mfl[c[0]].read().keys())
+ else:
+ changedfiles.update(c[3])
+ # END NARROW HACK
+ # Record a complete list of potentially-changed files in
+ # this manifest.
+ return x
+
+ self._verbosenote(_('uncompressed size of bundle content:\n'))
+ size = 0
+ for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
+ size += len(chunk)
+ yield chunk
+ self._verbosenote(_('%8.i (changelog)\n') % size)
+
+ # We need to make sure that the linkrev in the changegroup refers to
+ # the first changeset that introduced the manifest or file revision.
+ # The fastpath is usually safer than the slowpath, because the filelogs
+ # are walked in revlog order.
+ #
+ # When taking the slowpath with reorder=None and the manifest revlog
+ # uses generaldelta, the manifest may be walked in the "wrong" order.
+ # Without 'clrevorder', we would get an incorrect linkrev (see fix in
+ # cc0ff93d0c0c).
+ #
+ # When taking the fastpath, we are only vulnerable to reordering
+ # of the changelog itself. The changelog never uses generaldelta, so
+ # it is only reordered when reorder=True. To handle this case, we
+ # simply take the slowpath, which already has the 'clrevorder' logic.
+ # This was also fixed in cc0ff93d0c0c.
+ fastpathlinkrev = fastpathlinkrev and not self._reorder
+ # Treemanifests don't work correctly with fastpathlinkrev
+ # either, because we don't discover which directory nodes to
+ # send along with files. This could probably be fixed.
+ fastpathlinkrev = fastpathlinkrev and (
+ 'treemanifest' not in repo.requirements)
+ # Shallow clones also don't work correctly with fastpathlinkrev
+ # because file nodes may need to be sent for a manifest even if they
+ # weren't introduced by that manifest.
+ fastpathlinkrev = fastpathlinkrev and not self.is_shallow
+
+ moreargs = []
+ if self.generatemanifests.func_code.co_argcount == 7:
+ # The source argument was added to generatemanifests in hg in
+ # 75cc1f1e11f2 (2017/09/11).
+ moreargs.append(source)
+ for chunk in self.generatemanifests(commonrevs, clrevorder,
+ fastpathlinkrev, mfs, fnodes, *moreargs):
+ yield chunk
+ # BEGIN NARROW HACK
+ mfdicts = None
+ if self.is_shallow:
+ mfdicts = [(self._repo.manifestlog[n].read(), lr)
+ for (n, lr) in mfs.iteritems()]
+ # END NARROW HACK
+ mfs.clear()
+ clrevs = set(cl.rev(x) for x in clnodes)
+
+ if not fastpathlinkrev:
+ def linknodes(unused, fname):
+ return fnodes.get(fname, {})
+ else:
+ cln = cl.node
+ def linknodes(filerevlog, fname):
+ llr = filerevlog.linkrev
+ fln = filerevlog.node
+ revs = ((r, llr(r)) for r in filerevlog)
+ return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
+
+ # BEGIN NARROW HACK
+ #
+ # We need to pass the mfdicts variable down into
+ # generatefiles(), but more than one command might have
+ # wrapped generatefiles so we can't modify the function
+ # signature. Instead, we pass the data to ourselves using an
+ # instance attribute. I'm sorry.
+ self._mfdicts = mfdicts
+ # END NARROW HACK
+ for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
+ source):
+ yield chunk
+
+ yield self.close()
+
+ if clnodes:
+ repo.hook('outgoing', node=node.hex(clnodes[0]), source=source)
+ extensions.wrapfunction(changegroup.cg1packer, 'generate', generate)
+
+ def revchunk(orig, self, revlog, rev, prev, linknode):
+ if not util.safehasattr(self, 'full_nodes'):
+ # not sending a narrow changegroup
+ for x in orig(self, revlog, rev, prev, linknode):
+ yield x
+ return
+ # build up some mapping information that's useful later. See
+ # the local() nested function below.
+ if not self.changelog_done:
+ self.clnode_to_rev[linknode] = rev
+ linkrev = rev
+ self.clrev_to_localrev[linkrev] = rev
+ else:
+ linkrev = self.clnode_to_rev[linknode]
+ self.clrev_to_localrev[linkrev] = rev
+ # This is a node to send in full, because the changeset it
+ # corresponds to was a full changeset.
+ if linknode in self.full_nodes:
+ for x in orig(self, revlog, rev, prev, linknode):
+ yield x
+ return
+ # At this point, a node can either be one we should skip or an
+ # ellipsis. If it's not an ellipsis, bail immediately.
+ if linkrev not in self.precomputed_ellipsis:
+ return
+ linkparents = self.precomputed_ellipsis[linkrev]
+ def local(clrev):
+ """Turn a changelog revnum into a local revnum.
+
+ The ellipsis dag is stored as revnums on the changelog,
+ but when we're producing ellipsis entries for
+ non-changelog revlogs, we need to turn those numbers into
+ something local. This does that for us, and during the
+ changelog sending phase will also expand the stored
+ mappings as needed.
+ """
+ if clrev == node.nullrev:
+ return node.nullrev
+ if not self.changelog_done:
+ # If we're doing the changelog, it's possible that we
+ # have a parent that is already on the client, and we
+ # need to store some extra mapping information so that
+ # our contained ellipsis nodes will be able to resolve
+ # their parents.
+ if clrev not in self.clrev_to_localrev:
+ clnode = revlog.node(clrev)
+ self.clnode_to_rev[clnode] = clrev
+ return clrev
+ # Walk the ellipsis-ized changelog breadth-first looking for a
+ # change that has been linked from the current revlog.
+ #
+ # For a flat manifest revlog only a single step should be necessary
+ # as all relevant changelog entries are relevant to the flat
+ # manifest.
+ #
+ # For a filelog or tree manifest dirlog however not every changelog
+ # entry will have been relevant, so we need to skip some changelog
+ # nodes even after ellipsis-izing.
+ walk = [clrev]
+ while walk:
+ p = walk[0]
+ walk = walk[1:]
+ if p in self.clrev_to_localrev:
+ return self.clrev_to_localrev[p]
+ elif p in self.full_nodes:
+ walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
+ if pp != node.nullrev])
+ elif p in self.precomputed_ellipsis:
+ walk.extend([pp for pp in self.precomputed_ellipsis[p]
+ if pp != node.nullrev])
+ else:
+ # In this case, we've got an ellipsis with parents
+ # outside the current bundle (likely an
+ # incremental pull). We "know" that we can use the
+ # value of this same revlog at whatever revision
+ # is pointed to by linknode. "Know" is in scare
+ # quotes because I haven't done enough examination
+ # of edge cases to convince myself this is really
+ # a fact - it works for all the (admittedly
+ # thorough) cases in our testsuite, but I would be
+ # somewhat unsurprised to find a case in the wild
+ # where this breaks down a bit. That said, I don't
+ # know if it would hurt anything.
+ for i in xrange(rev, 0, -1):
+ if revlog.linkrev(i) == clrev:
+ return i
+ # We failed to resolve a parent for this node, so
+ # we crash the changegroup construction.
+ raise error.Abort(
+ 'unable to resolve parent while packing %r %r'
+ ' for changeset %r' % (revlog.indexfile, rev, clrev))
+ return node.nullrev
+
+ if not linkparents or (
+ revlog.parentrevs(rev) == (node.nullrev, node.nullrev)):
+ p1, p2 = node.nullrev, node.nullrev
+ elif len(linkparents) == 1:
+ p1, = sorted(local(p) for p in linkparents)
+ p2 = node.nullrev
+ else:
+ p1, p2 = sorted(local(p) for p in linkparents)
+ yield ellipsisdata(
+ self, rev, revlog, p1, p2, revlog.revision(rev), linknode)
+ extensions.wrapfunction(changegroup.cg1packer, 'revchunk', revchunk)
+
+ def deltaparent(orig, self, revlog, rev, p1, p2, prev):
+ if util.safehasattr(self, 'full_nodes'):
+ # TODO: send better deltas when in narrow mode.
+ #
+ # changegroup.group() loops over revisions to send,
+ # including revisions we'll skip. What this means is that
+ # `prev` will be a potentially useless delta base for all
+ # ellipsis nodes, as the client likely won't have it. In
+ # the future we should do bookkeeping about which nodes
+ # have been sent to the client, and try to be
+ # significantly smarter about delta bases. This is
+ # slightly tricky because this same code has to work for
+ # all revlogs, and we don't have the linkrev/linknode here.
+ return p1
+ return orig(self, revlog, rev, p1, p2, prev)
+ extensions.wrapfunction(changegroup.cg2packer, 'deltaparent', deltaparent)