narrow: when widening, don't include manifests the client already has
authorMartin von Zweigbergk <martinvonz@google.com>
Fri, 05 Oct 2018 11:07:34 -0700
changeset 40344 2c5835b4246b
parent 40343 a69d5823af6d
child 40345 d30a19d10441
narrow: when widening, don't include manifests the client already has When widening, we already don't include the changelog (since f1844a10ee19) and files that the client already has (since c73c7653dfb9). However, we still include all manifests needed for the new narrowspec. When using flat manifests, that means we resend all the manifests even though the client necessarily has all of them. For tree manifests, we unnecessarily resend the root manifests and any subdirectory manifests that the client already has. This patch makes it so we no longer resend manifests that the client already has. It does so by passing an extra matcher to the changegroup packer and it uses that for filtering out directories matching the old matcher's visitdir(). For consistency between directories and files, it also makes the filtering of files look at both old and new matcher rather than passing in a diff matcher as we did before. Differential Revision: https://phab.mercurial-scm.org/D4895
hgext/narrow/narrowbundle2.py
hgext/narrow/narrowwirepeer.py
mercurial/bundle2.py
mercurial/changegroup.py
mercurial/exchange.py
--- a/hgext/narrow/narrowbundle2.py	Wed Oct 17 09:30:07 2018 -0700
+++ b/hgext/narrow/narrowbundle2.py	Fri Oct 05 11:07:34 2018 -0700
@@ -117,7 +117,7 @@
             repo, set(), common, known, newmatch)
         if newvisit:
             packer = changegroup.getbundler(version, repo,
-                                            filematcher=newmatch,
+                                            matcher=newmatch,
                                             ellipses=True,
                                             shallow=depth is not None,
                                             ellipsisroots=newellipsis,
@@ -135,7 +135,7 @@
     repo.ui.debug('Found %d relevant revs\n' % len(relevant_nodes))
     if visitnodes:
         packer = changegroup.getbundler(version, repo,
-                                        filematcher=newmatch,
+                                        matcher=newmatch,
                                         ellipses=True,
                                         shallow=depth is not None,
                                         ellipsisroots=ellipsisroots,
--- a/hgext/narrow/narrowwirepeer.py	Wed Oct 17 09:30:07 2018 -0700
+++ b/hgext/narrow/narrowwirepeer.py	Fri Oct 05 11:07:34 2018 -0700
@@ -12,7 +12,6 @@
     error,
     extensions,
     hg,
-    match as matchmod,
     narrowspec,
     pycompat,
     wireprototypes,
@@ -82,9 +81,8 @@
                                     exclude=newexcludes)
         oldmatch = narrowspec.match(repo.root, include=oldincludes,
                                     exclude=oldexcludes)
-        diffmatch = matchmod.differencematcher(newmatch, oldmatch)
 
-        bundler = bundle2.widen_bundle(repo, diffmatch, common, known,
+        bundler = bundle2.widen_bundle(repo, oldmatch, newmatch, common, known,
                                              cgversion, ellipses)
     except error.Abort as exc:
         bundler = bundle2.bundle20(repo.ui)
--- a/mercurial/bundle2.py	Wed Oct 17 09:30:07 2018 -0700
+++ b/mercurial/bundle2.py	Fri Oct 05 11:07:34 2018 -0700
@@ -2278,12 +2278,13 @@
     streamclone.applybundlev2(repo, part, filecount, bytecount,
                               requirements)
 
-def widen_bundle(repo, diffmatcher, common, known, cgversion, ellipses):
+def widen_bundle(repo, oldmatcher, newmatcher, common, known, cgversion,
+                 ellipses):
     """generates bundle2 for widening a narrow clone
 
     repo is the localrepository instance
-    diffmatcher is a differencemacther of '(newincludes, newexcludes) -
-    (oldincludes, oldexcludes)'
+    oldmatcher matches what the client already has
+    newmatcher matches what the client needs (including what it already has)
     common is set of common heads between server and client
     known is a set of revs known on the client side (used in ellipses)
     cgversion is the changegroup version to send
@@ -2300,7 +2301,8 @@
         # XXX: we should only send the filelogs (and treemanifest). user
         # already has the changelog and manifest
         packer = changegroup.getbundler(cgversion, repo,
-                                        filematcher=diffmatcher,
+                                        oldmatcher=oldmatcher,
+                                        matcher=newmatcher,
                                         fullnodes=commonnodes)
         cgdata = packer.generate(set([nodemod.nullid]), list(commonnodes),
                                  False, 'narrow_widen', changelog=False)
--- a/mercurial/changegroup.py	Wed Oct 17 09:30:07 2018 -0700
+++ b/mercurial/changegroup.py	Fri Oct 05 11:07:34 2018 -0700
@@ -727,14 +727,17 @@
         progress.complete()
 
 class cgpacker(object):
-    def __init__(self, repo, filematcher, version,
+    def __init__(self, repo, oldmatcher, matcher, version,
                  builddeltaheader, manifestsend,
                  forcedeltaparentprev=False,
                  bundlecaps=None, ellipses=False,
                  shallow=False, ellipsisroots=None, fullnodes=None):
         """Given a source repo, construct a bundler.
 
-        filematcher is a matcher that matches on files to include in the
+        oldmatcher is a matcher that matches on files the client already has.
+        These will not be included in the changegroup.
+
+        matcher is a matcher that matches on files to include in the
         changegroup. Used to facilitate sparse changegroups.
 
         forcedeltaparentprev indicates whether delta parents must be against
@@ -761,8 +764,10 @@
         ellipsis because for very large histories we expect this to be
         significantly smaller.
         """
-        assert filematcher
-        self._filematcher = filematcher
+        assert oldmatcher
+        assert matcher
+        self._oldmatcher = oldmatcher
+        self._matcher = matcher
 
         self.version = version
         self._forcedeltaparentprev = forcedeltaparentprev
@@ -1027,7 +1032,7 @@
             tree, nodes = tmfnodes.popitem()
             store = mfl.getstorage(tree)
 
-            if not self._filematcher.visitdir(store.tree[:-1] or '.'):
+            if not self._matcher.visitdir(store.tree[:-1] or '.'):
                 # No nodes to send because this directory is out of
                 # the client's view of the repository (probably
                 # because of narrow clones).
@@ -1051,7 +1056,16 @@
                 fullclnodes=self._fullclnodes,
                 precomputedellipsis=self._precomputedellipsis)
 
-            yield tree, deltas
+            if not self._oldmatcher.visitdir(store.tree[:-1] or '.'):
+                yield tree, deltas
+            else:
+                # 'deltas' is a generator and we need to consume it even if
+                # we are not going to send it because a side-effect is that
+                # it updates tmdnodes (via lookupfn)
+                for d in deltas:
+                    pass
+                if not tree:
+                    yield tree, []
 
     def _prunemanifests(self, store, nodes, commonrevs):
         # This is split out as a separate method to allow filtering
@@ -1066,7 +1080,8 @@
     # The 'source' parameter is useful for extensions
     def generatefiles(self, changedfiles, commonrevs, source,
                       mfdicts, fastpathlinkrev, fnodes, clrevs):
-        changedfiles = list(filter(self._filematcher, changedfiles))
+        changedfiles = [f for f in changedfiles
+                        if self._matcher(f) and not self._oldmatcher(f)]
 
         if not fastpathlinkrev:
             def normallinknodes(unused, fname):
@@ -1151,12 +1166,13 @@
 
         progress.complete()
 
-def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
-                   shallow=False, ellipsisroots=None, fullnodes=None):
+def _makecg1packer(repo, oldmatcher, matcher, bundlecaps,
+                   ellipses=False, shallow=False, ellipsisroots=None,
+                   fullnodes=None):
     builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
         d.node, d.p1node, d.p2node, d.linknode)
 
-    return cgpacker(repo, filematcher, b'01',
+    return cgpacker(repo, oldmatcher, matcher, b'01',
                     builddeltaheader=builddeltaheader,
                     manifestsend=b'',
                     forcedeltaparentprev=True,
@@ -1166,12 +1182,13 @@
                     ellipsisroots=ellipsisroots,
                     fullnodes=fullnodes)
 
-def _makecg2packer(repo, filematcher, bundlecaps, ellipses=False,
-                   shallow=False, ellipsisroots=None, fullnodes=None):
+def _makecg2packer(repo, oldmatcher, matcher, bundlecaps,
+                   ellipses=False, shallow=False, ellipsisroots=None,
+                   fullnodes=None):
     builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
         d.node, d.p1node, d.p2node, d.basenode, d.linknode)
 
-    return cgpacker(repo, filematcher, b'02',
+    return cgpacker(repo, oldmatcher, matcher, b'02',
                     builddeltaheader=builddeltaheader,
                     manifestsend=b'',
                     bundlecaps=bundlecaps,
@@ -1180,12 +1197,13 @@
                     ellipsisroots=ellipsisroots,
                     fullnodes=fullnodes)
 
-def _makecg3packer(repo, filematcher, bundlecaps, ellipses=False,
-                   shallow=False, ellipsisroots=None, fullnodes=None):
+def _makecg3packer(repo, oldmatcher, matcher, bundlecaps,
+                   ellipses=False, shallow=False, ellipsisroots=None,
+                   fullnodes=None):
     builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
         d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
 
-    return cgpacker(repo, filematcher, b'03',
+    return cgpacker(repo, oldmatcher, matcher, b'03',
                     builddeltaheader=builddeltaheader,
                     manifestsend=closechunk(),
                     bundlecaps=bundlecaps,
@@ -1252,15 +1270,17 @@
     assert versions
     return min(versions)
 
-def getbundler(version, repo, bundlecaps=None, filematcher=None,
-               ellipses=False, shallow=False, ellipsisroots=None,
-               fullnodes=None):
+def getbundler(version, repo, bundlecaps=None, oldmatcher=None,
+               matcher=None, ellipses=False, shallow=False,
+               ellipsisroots=None, fullnodes=None):
     assert version in supportedoutgoingversions(repo)
 
-    if filematcher is None:
-        filematcher = matchmod.alwaysmatcher(repo.root, '')
+    if matcher is None:
+        matcher = matchmod.alwaysmatcher(repo.root, '')
+    if oldmatcher is None:
+        oldmatcher = matchmod.nevermatcher(repo.root, '')
 
-    if version == '01' and not filematcher.always():
+    if version == '01' and not matcher.always():
         raise error.ProgrammingError('version 01 changegroups do not support '
                                      'sparse file matchers')
 
@@ -1271,10 +1291,10 @@
 
     # Requested files could include files not in the local store. So
     # filter those out.
-    filematcher = repo.narrowmatch(filematcher)
+    matcher = repo.narrowmatch(matcher)
 
     fn = _packermap[version][0]
-    return fn(repo, filematcher, bundlecaps, ellipses=ellipses,
+    return fn(repo, oldmatcher, matcher, bundlecaps, ellipses=ellipses,
               shallow=shallow, ellipsisroots=ellipsisroots,
               fullnodes=fullnodes)
 
@@ -1297,9 +1317,9 @@
                         {'clcount': len(outgoing.missing) })
 
 def makestream(repo, outgoing, version, source, fastpath=False,
-               bundlecaps=None, filematcher=None):
+               bundlecaps=None, matcher=None):
     bundler = getbundler(version, repo, bundlecaps=bundlecaps,
-                         filematcher=filematcher)
+                         matcher=matcher)
 
     repo = repo.unfiltered()
     commonrevs = outgoing.common
--- a/mercurial/exchange.py	Wed Oct 17 09:30:07 2018 -0700
+++ b/mercurial/exchange.py	Fri Oct 05 11:07:34 2018 -0700
@@ -2153,14 +2153,12 @@
     if kwargs.get(r'narrow', False):
         include = sorted(filter(bool, kwargs.get(r'includepats', [])))
         exclude = sorted(filter(bool, kwargs.get(r'excludepats', [])))
-        filematcher = narrowspec.match(repo.root, include=include,
-                                       exclude=exclude)
+        matcher = narrowspec.match(repo.root, include=include, exclude=exclude)
     else:
-        filematcher = None
+        matcher = None
 
     cgstream = changegroup.makestream(repo, outgoing, version, source,
-                                      bundlecaps=bundlecaps,
-                                      filematcher=filematcher)
+                                      bundlecaps=bundlecaps, matcher=matcher)
 
     part = bundler.newpart('changegroup', data=cgstream)
     if cgversions: