hgext/narrow/narrowchangegroup.py
branchstable
changeset 40404 956ec6f1320d
parent 40131 535fc8a22365
parent 40403 bf249bb60087
child 40405 4185bc53d1e3
--- a/hgext/narrow/narrowchangegroup.py	Wed Oct 10 12:25:28 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,373 +0,0 @@
-# narrowchangegroup.py - narrow clone changegroup creation and consumption
-#
-# Copyright 2017 Google, Inc.
-#
-# This software may be used and distributed according to the terms of the
-# GNU General Public License version 2 or any later version.
-
-from __future__ import absolute_import
-
-from mercurial.i18n import _
-from mercurial import (
-    changegroup,
-    error,
-    extensions,
-    manifest,
-    match as matchmod,
-    mdiff,
-    node,
-    revlog,
-    util,
-)
-
-def setup():
-
-    def _cgmatcher(cgpacker):
-        localmatcher = cgpacker._repo.narrowmatch()
-        remotematcher = getattr(cgpacker, '_narrow_matcher', lambda: None)()
-        if remotematcher:
-            return matchmod.intersectmatchers(localmatcher, remotematcher)
-        else:
-            return localmatcher
-
-    def prune(orig, self, revlog, missing, commonrevs):
-        if isinstance(revlog, manifest.manifestrevlog):
-            matcher = _cgmatcher(self)
-            if (matcher and
-                not matcher.visitdir(revlog._dir[:-1] or '.')):
-                return []
-        return orig(self, revlog, missing, commonrevs)
-
-    extensions.wrapfunction(changegroup.cg1packer, 'prune', prune)
-
-    def generatefiles(orig, self, changedfiles, linknodes, commonrevs,
-                      source):
-        matcher = _cgmatcher(self)
-        if matcher:
-            changedfiles = list(filter(matcher, changedfiles))
-        if getattr(self, 'is_shallow', False):
-            # See comment in generate() for why this sadness is a thing.
-            mfdicts = self._mfdicts
-            del self._mfdicts
-            # In a shallow clone, the linknodes callback needs to also include
-            # those file nodes that are in the manifests we sent but weren't
-            # introduced by those manifests.
-            commonctxs = [self._repo[c] for c in commonrevs]
-            oldlinknodes = linknodes
-            clrev = self._repo.changelog.rev
-            def linknodes(flog, fname):
-                for c in commonctxs:
-                    try:
-                        fnode = c.filenode(fname)
-                        self.clrev_to_localrev[c.rev()] = flog.rev(fnode)
-                    except error.ManifestLookupError:
-                        pass
-                links = oldlinknodes(flog, fname)
-                if len(links) != len(mfdicts):
-                    for mf, lr in mfdicts:
-                        fnode = mf.get(fname, None)
-                        if fnode in links:
-                            links[fnode] = min(links[fnode], lr, key=clrev)
-                        elif fnode:
-                            links[fnode] = lr
-                return links
-        return orig(self, changedfiles, linknodes, commonrevs, source)
-    extensions.wrapfunction(
-        changegroup.cg1packer, 'generatefiles', generatefiles)
-
-    def ellipsisdata(packer, rev, revlog_, p1, p2, data, linknode):
-        n = revlog_.node(rev)
-        p1n, p2n = revlog_.node(p1), revlog_.node(p2)
-        flags = revlog_.flags(rev)
-        flags |= revlog.REVIDX_ELLIPSIS
-        meta = packer.builddeltaheader(
-            n, p1n, p2n, node.nullid, linknode, flags)
-        # TODO: try and actually send deltas for ellipsis data blocks
-        diffheader = mdiff.trivialdiffheader(len(data))
-        l = len(meta) + len(diffheader) + len(data)
-        return ''.join((changegroup.chunkheader(l),
-                        meta,
-                        diffheader,
-                        data))
-
-    def close(orig, self):
-        getattr(self, 'clrev_to_localrev', {}).clear()
-        if getattr(self, 'next_clrev_to_localrev', {}):
-            self.clrev_to_localrev = self.next_clrev_to_localrev
-            del self.next_clrev_to_localrev
-        self.changelog_done = True
-        return orig(self)
-    extensions.wrapfunction(changegroup.cg1packer, 'close', close)
-
-    # In a perfect world, we'd generate better ellipsis-ified graphs
-    # for non-changelog revlogs. In practice, we haven't started doing
-    # that yet, so the resulting DAGs for the manifestlog and filelogs
-    # are actually full of bogus parentage on all the ellipsis
-    # nodes. This has the side effect that, while the contents are
-    # correct, the individual DAGs might be completely out of whack in
-    # a case like 882681bc3166 and its ancestors (back about 10
-    # revisions or so) in the main hg repo.
-    #
-    # The one invariant we *know* holds is that the new (potentially
-    # bogus) DAG shape will be valid if we order the nodes in the
-    # order that they're introduced in dramatis personae by the
-    # changelog, so what we do is we sort the non-changelog histories
-    # by the order in which they are used by the changelog.
-    def _sortgroup(orig, self, revlog, nodelist, lookup):
-        if not util.safehasattr(self, 'full_nodes') or not self.clnode_to_rev:
-            return orig(self, revlog, nodelist, lookup)
-        key = lambda n: self.clnode_to_rev[lookup(n)]
-        return [revlog.rev(n) for n in sorted(nodelist, key=key)]
-
-    extensions.wrapfunction(changegroup.cg1packer, '_sortgroup', _sortgroup)
-
-    def generate(orig, self, commonrevs, clnodes, fastpathlinkrev, source):
-        '''yield a sequence of changegroup chunks (strings)'''
-        # Note: other than delegating to orig, the only deviation in
-        # logic from normal hg's generate is marked with BEGIN/END
-        # NARROW HACK.
-        if not util.safehasattr(self, 'full_nodes'):
-            # not sending a narrow bundle
-            for x in orig(self, commonrevs, clnodes, fastpathlinkrev, source):
-                yield x
-            return
-
-        repo = self._repo
-        cl = repo.changelog
-        mfl = repo.manifestlog
-        mfrevlog = mfl._revlog
-
-        clrevorder = {}
-        mfs = {} # needed manifests
-        fnodes = {} # needed file nodes
-        changedfiles = set()
-
-        # Callback for the changelog, used to collect changed files and manifest
-        # nodes.
-        # Returns the linkrev node (identity in the changelog case).
-        def lookupcl(x):
-            c = cl.read(x)
-            clrevorder[x] = len(clrevorder)
-            # BEGIN NARROW HACK
-            #
-            # Only update mfs if x is going to be sent. Otherwise we
-            # end up with bogus linkrevs specified for manifests and
-            # we skip some manifest nodes that we should otherwise
-            # have sent.
-            if x in self.full_nodes or cl.rev(x) in self.precomputed_ellipsis:
-                n = c[0]
-                # record the first changeset introducing this manifest version
-                mfs.setdefault(n, x)
-                # Set this narrow-specific dict so we have the lowest manifest
-                # revnum to look up for this cl revnum. (Part of mapping
-                # changelog ellipsis parents to manifest ellipsis parents)
-                self.next_clrev_to_localrev.setdefault(cl.rev(x),
-                                                       mfrevlog.rev(n))
-            # We can't trust the changed files list in the changeset if the
-            # client requested a shallow clone.
-            if self.is_shallow:
-                changedfiles.update(mfl[c[0]].read().keys())
-            else:
-                changedfiles.update(c[3])
-            # END NARROW HACK
-            # Record a complete list of potentially-changed files in
-            # this manifest.
-            return x
-
-        self._verbosenote(_('uncompressed size of bundle content:\n'))
-        size = 0
-        for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
-            size += len(chunk)
-            yield chunk
-        self._verbosenote(_('%8.i (changelog)\n') % size)
-
-        # We need to make sure that the linkrev in the changegroup refers to
-        # the first changeset that introduced the manifest or file revision.
-        # The fastpath is usually safer than the slowpath, because the filelogs
-        # are walked in revlog order.
-        #
-        # When taking the slowpath with reorder=None and the manifest revlog
-        # uses generaldelta, the manifest may be walked in the "wrong" order.
-        # Without 'clrevorder', we would get an incorrect linkrev (see fix in
-        # cc0ff93d0c0c).
-        #
-        # When taking the fastpath, we are only vulnerable to reordering
-        # of the changelog itself. The changelog never uses generaldelta, so
-        # it is only reordered when reorder=True. To handle this case, we
-        # simply take the slowpath, which already has the 'clrevorder' logic.
-        # This was also fixed in cc0ff93d0c0c.
-        fastpathlinkrev = fastpathlinkrev and not self._reorder
-        # Treemanifests don't work correctly with fastpathlinkrev
-        # either, because we don't discover which directory nodes to
-        # send along with files. This could probably be fixed.
-        fastpathlinkrev = fastpathlinkrev and (
-            'treemanifest' not in repo.requirements)
-        # Shallow clones also don't work correctly with fastpathlinkrev
-        # because file nodes may need to be sent for a manifest even if they
-        # weren't introduced by that manifest.
-        fastpathlinkrev = fastpathlinkrev and not self.is_shallow
-
-        for chunk in self.generatemanifests(commonrevs, clrevorder,
-                fastpathlinkrev, mfs, fnodes, source):
-            yield chunk
-        # BEGIN NARROW HACK
-        mfdicts = None
-        if self.is_shallow:
-            mfdicts = [(self._repo.manifestlog[n].read(), lr)
-                       for (n, lr) in mfs.iteritems()]
-        # END NARROW HACK
-        mfs.clear()
-        clrevs = set(cl.rev(x) for x in clnodes)
-
-        if not fastpathlinkrev:
-            def linknodes(unused, fname):
-                return fnodes.get(fname, {})
-        else:
-            cln = cl.node
-            def linknodes(filerevlog, fname):
-                llr = filerevlog.linkrev
-                fln = filerevlog.node
-                revs = ((r, llr(r)) for r in filerevlog)
-                return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs)
-
-        # BEGIN NARROW HACK
-        #
-        # We need to pass the mfdicts variable down into
-        # generatefiles(), but more than one command might have
-        # wrapped generatefiles so we can't modify the function
-        # signature. Instead, we pass the data to ourselves using an
-        # instance attribute. I'm sorry.
-        self._mfdicts = mfdicts
-        # END NARROW HACK
-        for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
-                                        source):
-            yield chunk
-
-        yield self.close()
-
-        if clnodes:
-            repo.hook('outgoing', node=node.hex(clnodes[0]), source=source)
-    extensions.wrapfunction(changegroup.cg1packer, 'generate', generate)
-
-    def revchunk(orig, self, revlog, rev, prev, linknode):
-        if not util.safehasattr(self, 'full_nodes'):
-            # not sending a narrow changegroup
-            for x in orig(self, revlog, rev, prev, linknode):
-                yield x
-            return
-        # build up some mapping information that's useful later. See
-        # the local() nested function below.
-        if not self.changelog_done:
-            self.clnode_to_rev[linknode] = rev
-            linkrev = rev
-            self.clrev_to_localrev[linkrev] = rev
-        else:
-            linkrev = self.clnode_to_rev[linknode]
-            self.clrev_to_localrev[linkrev] = rev
-        # This is a node to send in full, because the changeset it
-        # corresponds to was a full changeset.
-        if linknode in self.full_nodes:
-            for x in orig(self, revlog, rev, prev, linknode):
-                yield x
-            return
-        # At this point, a node can either be one we should skip or an
-        # ellipsis. If it's not an ellipsis, bail immediately.
-        if linkrev not in self.precomputed_ellipsis:
-            return
-        linkparents = self.precomputed_ellipsis[linkrev]
-        def local(clrev):
-            """Turn a changelog revnum into a local revnum.
-
-            The ellipsis dag is stored as revnums on the changelog,
-            but when we're producing ellipsis entries for
-            non-changelog revlogs, we need to turn those numbers into
-            something local. This does that for us, and during the
-            changelog sending phase will also expand the stored
-            mappings as needed.
-            """
-            if clrev == node.nullrev:
-                return node.nullrev
-            if not self.changelog_done:
-                # If we're doing the changelog, it's possible that we
-                # have a parent that is already on the client, and we
-                # need to store some extra mapping information so that
-                # our contained ellipsis nodes will be able to resolve
-                # their parents.
-                if clrev not in self.clrev_to_localrev:
-                    clnode = revlog.node(clrev)
-                    self.clnode_to_rev[clnode] = clrev
-                return clrev
-            # Walk the ellipsis-ized changelog breadth-first looking for a
-            # change that has been linked from the current revlog.
-            #
-            # For a flat manifest revlog only a single step should be necessary
-            # as all relevant changelog entries are relevant to the flat
-            # manifest.
-            #
-            # For a filelog or tree manifest dirlog however not every changelog
-            # entry will have been relevant, so we need to skip some changelog
-            # nodes even after ellipsis-izing.
-            walk = [clrev]
-            while walk:
-                p = walk[0]
-                walk = walk[1:]
-                if p in self.clrev_to_localrev:
-                    return self.clrev_to_localrev[p]
-                elif p in self.full_nodes:
-                    walk.extend([pp for pp in self._repo.changelog.parentrevs(p)
-                                    if pp != node.nullrev])
-                elif p in self.precomputed_ellipsis:
-                    walk.extend([pp for pp in self.precomputed_ellipsis[p]
-                                    if pp != node.nullrev])
-                else:
-                    # In this case, we've got an ellipsis with parents
-                    # outside the current bundle (likely an
-                    # incremental pull). We "know" that we can use the
-                    # value of this same revlog at whatever revision
-                    # is pointed to by linknode. "Know" is in scare
-                    # quotes because I haven't done enough examination
-                    # of edge cases to convince myself this is really
-                    # a fact - it works for all the (admittedly
-                    # thorough) cases in our testsuite, but I would be
-                    # somewhat unsurprised to find a case in the wild
-                    # where this breaks down a bit. That said, I don't
-                    # know if it would hurt anything.
-                    for i in xrange(rev, 0, -1):
-                        if revlog.linkrev(i) == clrev:
-                            return i
-                    # We failed to resolve a parent for this node, so
-                    # we crash the changegroup construction.
-                    raise error.Abort(
-                        'unable to resolve parent while packing %r %r'
-                        ' for changeset %r' % (revlog.indexfile, rev, clrev))
-            return node.nullrev
-
-        if not linkparents or (
-            revlog.parentrevs(rev) == (node.nullrev, node.nullrev)):
-            p1, p2 = node.nullrev, node.nullrev
-        elif len(linkparents) == 1:
-            p1, = sorted(local(p) for p in linkparents)
-            p2 = node.nullrev
-        else:
-            p1, p2 = sorted(local(p) for p in linkparents)
-        n = revlog.node(rev)
-        yield ellipsisdata(
-            self, rev, revlog, p1, p2, revlog.revision(n), linknode)
-    extensions.wrapfunction(changegroup.cg1packer, 'revchunk', revchunk)
-
-    def deltaparent(orig, self, revlog, rev, p1, p2, prev):
-        if util.safehasattr(self, 'full_nodes'):
-            # TODO: send better deltas when in narrow mode.
-            #
-            # changegroup.group() loops over revisions to send,
-            # including revisions we'll skip. What this means is that
-            # `prev` will be a potentially useless delta base for all
-            # ellipsis nodes, as the client likely won't have it. In
-            # the future we should do bookkeeping about which nodes
-            # have been sent to the client, and try to be
-            # significantly smarter about delta bases. This is
-            # slightly tricky because this same code has to work for
-            # all revlogs, and we don't have the linkrev/linknode here.
-            return p1
-        return orig(self, revlog, rev, p1, p2, prev)
-    extensions.wrapfunction(changegroup.cg2packer, 'deltaparent', deltaparent)