narrow: when writing treemanifests, skip inspecting directories outside narrow
authorspectral <spectral@google.com>
Fri, 14 Sep 2018 16:29:51 -0700
changeset 39668 24870f1be088
parent 39667 0b7594ada0db
child 39669 3f11cb1aeb90
narrow: when writing treemanifests, skip inspecting directories outside narrow This provides significant speed benefits when narrow and treemanifests are in use, see the timing numbers below. Note that like previously, differences of <5% are considered noise. The below timing numbers are in the same style as previously (example: ee7ee0c516ca). 'before' is 9db85644, and does not include that example commit's improvements. diff --git: repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before ------+---+---+------------------------+-----------------------+------------ m-u | | | 1.327 s +- 0.051 s | 1.296 s +- 0.009 s | 97.7% m-u | | x | 1.310 s +- 0.020 s | 1.295 s +- 0.015 s | 98.9% m-u | x | | 1.295 s +- 0.018 s | 1.296 s +- 0.007 s | 100.1% m-u | x | x | 83.5 ms +- 0.8 ms | 84.1 ms +- 0.8 ms | 100.7% l-d-r | | | 205.1 ms +- 3.5 ms | 205.0 ms +- 3.8 ms | 100.0% l-d-r | | x | 194.2 ms +- 5.6 ms | 192.3 ms +- 4.3 ms | 99.0% l-d-r | x | | 99.1 ms +- 2.2 ms | 97.8 ms +- 0.9 ms | 98.7% l-d-r | x | x | 66.2 ms +- 1.0 ms | 67.2 ms +- 2.7 ms | 101.5% diff -c . --git: repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before ------+---+---+------------------------+-----------------------+------------ m-u | | | 233.9 ms +- 1.9 ms | 235.6 ms +- 5.1 ms | 100.7% m-u | | x | 151.4 ms +- 1.2 ms | 152.2 ms +- 2.0 ms | 100.5% m-u | x | | 234.8 ms +- 2.7 ms | 235.0 ms +- 2.7 ms | 100.1% m-u | x | x | 127.8 ms +- 2.1 ms | 126.0 ms +- 1.1 ms | 98.6% l-d-r | | | 82.5 ms +- 1.6 ms | 82.3 ms +- 2.0 ms | 99.8% l-d-r | | x | 3.742 s +- 0.017 s | 3.819 s +- 0.208 s | 102.1% l-d-r | x | | 84.4 ms +- 1.5 ms | 83.2 ms +- 1.0 ms | 98.6% l-d-r | x | x | 751.2 ms +- 5.0 ms | 755.8 ms +- 12.9 ms | 100.6% rebase -r . --keep -d .^^: repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before ------+---+---+------------------------+-----------------------+------------ m-u | | | 5.519 s +- 0.038 s | 5.526 s +- 0.057 s | 100.1% m-u | | x | 5.588 s +- 0.048 s | 5.607 s +- 0.061 s | 100.3% m-u | x | | 5.520 s +- 0.044 s | 5.546 s +- 0.059 s | 100.5% m-u | x | x | 586.6 ms +- 12.8 ms | 554.9 ms +- 21.2 ms | 94.6% <-- l-d-r | | | 629.8 ms +- 5.5 ms | 627.4 ms +- 6.6 ms | 99.6% l-d-r | | x | 6.165 s +- 0.058 s | 6.255 s +- 0.303 s | 101.5% l-d-r | x | | 270.2 ms +- 2.3 ms | 271.4 ms +- 2.7 ms | 100.4% l-d-r | x | x | 4.700 s +- 0.025 s | 1.651 s +- 0.016 s | 35.1% <-- status --change . --copies: repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before ------+---+---+------------------------+-----------------------+------------ m-u | | | 215.4 ms +- 2.3 ms | 216.5 ms +- 4.2 ms | 100.5% m-u | | x | 132.9 ms +- 1.2 ms | 132.0 ms +- 1.4 ms | 99.3% m-u | x | | 217.0 ms +- 1.9 ms | 215.4 ms +- 1.9 ms | 99.3% m-u | x | x | 108.6 ms +- 1.0 ms | 108.2 ms +- 1.5 ms | 99.6% l-d-r | | | 80.0 ms +- 1.3 ms | 80.5 ms +- 1.1 ms | 100.6% l-d-r | | x | 3.916 s +- 0.187 s | 3.966 s +- 0.236 s | 101.3% l-d-r | x | | 84.4 ms +- 3.1 ms | 83.9 ms +- 1.1 ms | 99.4% l-d-r | x | x | 758.0 ms +- 8.2 ms | 753.5 ms +- 5.0 ms | 99.4% status --copies: repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before ------+---+---+------------------------+-----------------------+------------ m-u | | | 1.905 s +- 0.025 s | 1.910 s +- 0.044 s | 100.3% m-u | | x | 1.892 s +- 0.009 s | 1.895 s +- 0.012 s | 100.2% m-u | x | | 1.891 s +- 0.012 s | 1.902 s +- 0.018 s | 100.6% m-u | x | x | 93.3 ms +- 0.9 ms | 93.4 ms +- 0.8 ms | 100.1% l-d-r | | | 570.7 ms +- 7.8 ms | 571.9 ms +- 18.5 ms | 100.2% l-d-r | | x | 561.5 ms +- 5.2 ms | 562.9 ms +- 6.1 ms | 100.2% l-d-r | x | | 171.7 ms +- 2.6 ms | 171.9 ms +- 1.2 ms | 100.1% l-d-r | x | x | 142.7 ms +- 2.0 ms | 140.3 ms +- 1.0 ms | 98.3% update $rev^; ~/src/hg/hg{hg}/hg update $rev: repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before ------+---+---+------------------------+-----------------------+------------ m-u | | | 3.126 s +- 0.016 s | 3.128 s +- 0.015 s | 100.1% m-u | | x | 3.014 s +- 0.068 s | 3.008 s +- 0.031 s | 99.8% m-u | x | | 3.143 s +- 0.037 s | 3.184 s +- 0.086 s | 101.3% m-u | x | x | 308.0 ms +- 1.8 ms | 308.1 ms +- 5.7 ms | 100.0% l-d-r | | | 430.8 ms +- 4.5 ms | 436.4 ms +- 8.7 ms | 101.3% l-d-r | | x | 9.676 s +- 0.127 s | 9.945 s +- 0.272 s | 102.8% l-d-r | x | | 254.2 ms +- 3.3 ms | 255.7 ms +- 3.1 ms | 100.6% l-d-r | x | x | 1.571 s +- 0.030 s | 1.555 s +- 0.014 s | 99.0% Differential Revision: https://phab.mercurial-scm.org/D4606
mercurial/localrepo.py
mercurial/manifest.py
mercurial/repository.py
--- a/mercurial/localrepo.py	Mon Sep 17 15:16:20 2018 -0400
+++ b/mercurial/localrepo.py	Fri Sep 14 16:29:51 2018 -0700
@@ -2109,9 +2109,16 @@
                                   'changelog, but manifest differs)\n')
                 if files or md:
                     self.ui.note(_("committing manifest\n"))
+                    # we're using narrowmatch here since it's already applied at
+                    # other stages (such as dirstate.walk), so we're already
+                    # ignoring things outside of narrowspec in most cases. The
+                    # one case where we might have files outside the narrowspec
+                    # at this point is merges, and we already error out in the
+                    # case where the merge has files outside of the narrowspec,
+                    # so this is safe.
                     mn = mctx.write(trp, linkrev,
                                     p1.manifestnode(), p2.manifestnode(),
-                                    added, drop)
+                                    added, drop, match=self.narrowmatch())
                 else:
                     self.ui.debug('reusing manifest form p1 (listed files '
                                   'actually unchanged)\n')
--- a/mercurial/manifest.py	Mon Sep 17 15:16:20 2018 -0400
+++ b/mercurial/manifest.py	Fri Sep 14 16:29:51 2018 -0700
@@ -1203,7 +1203,7 @@
             s._dirty = False
         self._loadfunc = _load_for_read
 
-    def writesubtrees(self, m1, m2, writesubtree):
+    def writesubtrees(self, m1, m2, writesubtree, match):
         self._load() # for consistency; should never have any effect here
         m1._load()
         m2._load()
@@ -1214,12 +1214,21 @@
                 return ld[1]
             return m._dirs.get(d, emptytree)._node
 
+        # we should have always loaded everything by the time we get here for
+        # `self`, but possibly not in `m1` or `m2`.
+        assert not self._lazydirs
+        # let's skip investigating things that `match` says we do not need.
+        visit = match.visitchildrenset(self._dir[:-1] or '.')
+        if visit == 'this' or visit == 'all':
+            visit = None
         for d, subm in self._dirs.iteritems():
+            if visit and d[:-1] not in visit:
+                continue
             subp1 = getnode(m1, d)
             subp2 = getnode(m2, d)
             if subp1 == nullid:
                 subp1, subp2 = subp2, subp1
-            writesubtree(subm, subp1, subp2)
+            writesubtree(subm, subp1, subp2, match)
 
     def walksubtrees(self, matcher=None):
         """Returns an iterator of the subtrees of this manifest, including this
@@ -1445,7 +1454,8 @@
             self._dirlogcache[d] = mfrevlog
         return self._dirlogcache[d]
 
-    def add(self, m, transaction, link, p1, p2, added, removed, readtree=None):
+    def add(self, m, transaction, link, p1, p2, added, removed, readtree=None,
+            match=None):
         if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'):
             # If our first parent is in the manifest cache, we can
             # compute a delta here using properties we know about the
@@ -1469,9 +1479,11 @@
             # process.
             if self._treeondisk:
                 assert readtree, "readtree must be set for treemanifest writes"
+                assert match, "match must be specified for treemanifest writes"
                 m1 = readtree(self.tree, p1)
                 m2 = readtree(self.tree, p2)
-                n = self._addtree(m, transaction, link, m1, m2, readtree)
+                n = self._addtree(m, transaction, link, m1, m2, readtree,
+                                  match=match)
                 arraytext = None
             else:
                 text = m.text()
@@ -1483,17 +1495,17 @@
 
         return n
 
-    def _addtree(self, m, transaction, link, m1, m2, readtree):
+    def _addtree(self, m, transaction, link, m1, m2, readtree, match):
         # If the manifest is unchanged compared to one parent,
         # don't write a new revision
         if self.tree != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince(
             m2)):
             return m.node()
-        def writesubtree(subm, subp1, subp2):
+        def writesubtree(subm, subp1, subp2, match):
             sublog = self.dirlog(subm.dir())
             sublog.add(subm, transaction, link, subp1, subp2, None, None,
-                       readtree=readtree)
-        m.writesubtrees(m1, m2, writesubtree)
+                       readtree=readtree, match=match)
+        m.writesubtrees(m1, m2, writesubtree, match)
         text = m.dirtext()
         n = None
         if self.tree != '':
@@ -1697,9 +1709,9 @@
     def read(self):
         return self._manifestdict
 
-    def write(self, transaction, link, p1, p2, added, removed):
+    def write(self, transaction, link, p1, p2, added, removed, match=None):
         return self._storage().add(self._manifestdict, transaction, link,
-                                   p1, p2, added, removed)
+                                   p1, p2, added, removed, match=match)
 
 @interfaceutil.implementer(repository.imanifestrevisionstored)
 class manifestctx(object):
@@ -1802,11 +1814,12 @@
     def read(self):
         return self._treemanifest
 
-    def write(self, transaction, link, p1, p2, added, removed):
+    def write(self, transaction, link, p1, p2, added, removed, match=None):
         def readtree(dir, node):
             return self._manifestlog.get(dir, node).read()
         return self._storage().add(self._treemanifest, transaction, link,
-                                   p1, p2, added, removed, readtree=readtree)
+                                   p1, p2, added, removed, readtree=readtree,
+                                   match=match)
 
 @interfaceutil.implementer(repository.imanifestrevisionstored)
 class treemanifestctx(object):
--- a/mercurial/repository.py	Mon Sep 17 15:16:20 2018 -0400
+++ b/mercurial/repository.py	Fri Sep 14 16:29:51 2018 -0700
@@ -978,13 +978,18 @@
 class imanifestrevisionwritable(imanifestrevisionbase):
     """Interface representing a manifest revision that can be committed."""
 
-    def write(transaction, linkrev, p1node, p2node, added, removed):
+    def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
         """Add this revision to storage.
 
         Takes a transaction object, the changeset revision number it will
         be associated with, its parent nodes, and lists of added and
         removed paths.
 
+        If match is provided, storage can choose not to inspect or write out
+        items that do not match. Storage is still required to be able to provide
+        the full manifest in the future for any directories written (these
+        manifests should not be "narrowed on disk").
+
         Returns the binary node of the created revision.
         """
 
@@ -1141,7 +1146,8 @@
     def dirlog(d):
         """Obtain a manifest storage instance for a tree."""
 
-    def add(m, transaction, link, p1, p2, added, removed, readtree=None):
+    def add(m, transaction, link, p1, p2, added, removed, readtree=None,
+            match=None):
         """Add a revision to storage.
 
         ``m`` is an object conforming to ``imanifestdict``.
@@ -1152,6 +1158,15 @@
 
         ``added`` and ``removed`` are iterables of added and removed paths,
         respectively.
+
+        ``readtree`` is a function that can be used to read the child tree(s)
+        when recursively writing the full tree structure when using
+        treemanifets.
+
+        ``match`` is a matcher that can be used to hint to storage that not all
+        paths must be inspected; this is an optimization and can be safely
+        ignored. Note that the storage must still be able to reproduce a full
+        manifest including files that did not match.
         """
 
 class imanifestlog(interfaceutil.Interface):