# HG changeset patch # User spectral # Date 1536967791 25200 # Node ID 24870f1be088c7df7df7dd0ae28711c71775e6e9 # Parent 0b7594ada0dbde476c5300af4b09f57fcb77d302 narrow: when writing treemanifests, skip inspecting directories outside narrow This provides significant speed benefits when narrow and treemanifests are in use, see the timing numbers below. Note that like previously, differences of <5% are considered noise. The below timing numbers are in the same style as previously (example: ee7ee0c516ca). 'before' is 9db85644, and does not include that example commit's improvements. diff --git: repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before ------+---+---+------------------------+-----------------------+------------ m-u | | | 1.327 s +- 0.051 s | 1.296 s +- 0.009 s | 97.7% m-u | | x | 1.310 s +- 0.020 s | 1.295 s +- 0.015 s | 98.9% m-u | x | | 1.295 s +- 0.018 s | 1.296 s +- 0.007 s | 100.1% m-u | x | x | 83.5 ms +- 0.8 ms | 84.1 ms +- 0.8 ms | 100.7% l-d-r | | | 205.1 ms +- 3.5 ms | 205.0 ms +- 3.8 ms | 100.0% l-d-r | | x | 194.2 ms +- 5.6 ms | 192.3 ms +- 4.3 ms | 99.0% l-d-r | x | | 99.1 ms +- 2.2 ms | 97.8 ms +- 0.9 ms | 98.7% l-d-r | x | x | 66.2 ms +- 1.0 ms | 67.2 ms +- 2.7 ms | 101.5% diff -c . --git: repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before ------+---+---+------------------------+-----------------------+------------ m-u | | | 233.9 ms +- 1.9 ms | 235.6 ms +- 5.1 ms | 100.7% m-u | | x | 151.4 ms +- 1.2 ms | 152.2 ms +- 2.0 ms | 100.5% m-u | x | | 234.8 ms +- 2.7 ms | 235.0 ms +- 2.7 ms | 100.1% m-u | x | x | 127.8 ms +- 2.1 ms | 126.0 ms +- 1.1 ms | 98.6% l-d-r | | | 82.5 ms +- 1.6 ms | 82.3 ms +- 2.0 ms | 99.8% l-d-r | | x | 3.742 s +- 0.017 s | 3.819 s +- 0.208 s | 102.1% l-d-r | x | | 84.4 ms +- 1.5 ms | 83.2 ms +- 1.0 ms | 98.6% l-d-r | x | x | 751.2 ms +- 5.0 ms | 755.8 ms +- 12.9 ms | 100.6% rebase -r . --keep -d .^^: repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before ------+---+---+------------------------+-----------------------+------------ m-u | | | 5.519 s +- 0.038 s | 5.526 s +- 0.057 s | 100.1% m-u | | x | 5.588 s +- 0.048 s | 5.607 s +- 0.061 s | 100.3% m-u | x | | 5.520 s +- 0.044 s | 5.546 s +- 0.059 s | 100.5% m-u | x | x | 586.6 ms +- 12.8 ms | 554.9 ms +- 21.2 ms | 94.6% <-- l-d-r | | | 629.8 ms +- 5.5 ms | 627.4 ms +- 6.6 ms | 99.6% l-d-r | | x | 6.165 s +- 0.058 s | 6.255 s +- 0.303 s | 101.5% l-d-r | x | | 270.2 ms +- 2.3 ms | 271.4 ms +- 2.7 ms | 100.4% l-d-r | x | x | 4.700 s +- 0.025 s | 1.651 s +- 0.016 s | 35.1% <-- status --change . --copies: repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before ------+---+---+------------------------+-----------------------+------------ m-u | | | 215.4 ms +- 2.3 ms | 216.5 ms +- 4.2 ms | 100.5% m-u | | x | 132.9 ms +- 1.2 ms | 132.0 ms +- 1.4 ms | 99.3% m-u | x | | 217.0 ms +- 1.9 ms | 215.4 ms +- 1.9 ms | 99.3% m-u | x | x | 108.6 ms +- 1.0 ms | 108.2 ms +- 1.5 ms | 99.6% l-d-r | | | 80.0 ms +- 1.3 ms | 80.5 ms +- 1.1 ms | 100.6% l-d-r | | x | 3.916 s +- 0.187 s | 3.966 s +- 0.236 s | 101.3% l-d-r | x | | 84.4 ms +- 3.1 ms | 83.9 ms +- 1.1 ms | 99.4% l-d-r | x | x | 758.0 ms +- 8.2 ms | 753.5 ms +- 5.0 ms | 99.4% status --copies: repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before ------+---+---+------------------------+-----------------------+------------ m-u | | | 1.905 s +- 0.025 s | 1.910 s +- 0.044 s | 100.3% m-u | | x | 1.892 s +- 0.009 s | 1.895 s +- 0.012 s | 100.2% m-u | x | | 1.891 s +- 0.012 s | 1.902 s +- 0.018 s | 100.6% m-u | x | x | 93.3 ms +- 0.9 ms | 93.4 ms +- 0.8 ms | 100.1% l-d-r | | | 570.7 ms +- 7.8 ms | 571.9 ms +- 18.5 ms | 100.2% l-d-r | | x | 561.5 ms +- 5.2 ms | 562.9 ms +- 6.1 ms | 100.2% l-d-r | x | | 171.7 ms +- 2.6 ms | 171.9 ms +- 1.2 ms | 100.1% l-d-r | x | x | 142.7 ms +- 2.0 ms | 140.3 ms +- 1.0 ms | 98.3% update $rev^; ~/src/hg/hg{hg}/hg update $rev: repo | N | T | before (mean +- stdev) | after (mean +- stdev) | % of before ------+---+---+------------------------+-----------------------+------------ m-u | | | 3.126 s +- 0.016 s | 3.128 s +- 0.015 s | 100.1% m-u | | x | 3.014 s +- 0.068 s | 3.008 s +- 0.031 s | 99.8% m-u | x | | 3.143 s +- 0.037 s | 3.184 s +- 0.086 s | 101.3% m-u | x | x | 308.0 ms +- 1.8 ms | 308.1 ms +- 5.7 ms | 100.0% l-d-r | | | 430.8 ms +- 4.5 ms | 436.4 ms +- 8.7 ms | 101.3% l-d-r | | x | 9.676 s +- 0.127 s | 9.945 s +- 0.272 s | 102.8% l-d-r | x | | 254.2 ms +- 3.3 ms | 255.7 ms +- 3.1 ms | 100.6% l-d-r | x | x | 1.571 s +- 0.030 s | 1.555 s +- 0.014 s | 99.0% Differential Revision: https://phab.mercurial-scm.org/D4606 diff -r 0b7594ada0db -r 24870f1be088 mercurial/localrepo.py --- a/mercurial/localrepo.py Mon Sep 17 15:16:20 2018 -0400 +++ b/mercurial/localrepo.py Fri Sep 14 16:29:51 2018 -0700 @@ -2109,9 +2109,16 @@ 'changelog, but manifest differs)\n') if files or md: self.ui.note(_("committing manifest\n")) + # we're using narrowmatch here since it's already applied at + # other stages (such as dirstate.walk), so we're already + # ignoring things outside of narrowspec in most cases. The + # one case where we might have files outside the narrowspec + # at this point is merges, and we already error out in the + # case where the merge has files outside of the narrowspec, + # so this is safe. mn = mctx.write(trp, linkrev, p1.manifestnode(), p2.manifestnode(), - added, drop) + added, drop, match=self.narrowmatch()) else: self.ui.debug('reusing manifest form p1 (listed files ' 'actually unchanged)\n') diff -r 0b7594ada0db -r 24870f1be088 mercurial/manifest.py --- a/mercurial/manifest.py Mon Sep 17 15:16:20 2018 -0400 +++ b/mercurial/manifest.py Fri Sep 14 16:29:51 2018 -0700 @@ -1203,7 +1203,7 @@ s._dirty = False self._loadfunc = _load_for_read - def writesubtrees(self, m1, m2, writesubtree): + def writesubtrees(self, m1, m2, writesubtree, match): self._load() # for consistency; should never have any effect here m1._load() m2._load() @@ -1214,12 +1214,21 @@ return ld[1] return m._dirs.get(d, emptytree)._node + # we should have always loaded everything by the time we get here for + # `self`, but possibly not in `m1` or `m2`. + assert not self._lazydirs + # let's skip investigating things that `match` says we do not need. + visit = match.visitchildrenset(self._dir[:-1] or '.') + if visit == 'this' or visit == 'all': + visit = None for d, subm in self._dirs.iteritems(): + if visit and d[:-1] not in visit: + continue subp1 = getnode(m1, d) subp2 = getnode(m2, d) if subp1 == nullid: subp1, subp2 = subp2, subp1 - writesubtree(subm, subp1, subp2) + writesubtree(subm, subp1, subp2, match) def walksubtrees(self, matcher=None): """Returns an iterator of the subtrees of this manifest, including this @@ -1445,7 +1454,8 @@ self._dirlogcache[d] = mfrevlog return self._dirlogcache[d] - def add(self, m, transaction, link, p1, p2, added, removed, readtree=None): + def add(self, m, transaction, link, p1, p2, added, removed, readtree=None, + match=None): if p1 in self.fulltextcache and util.safehasattr(m, 'fastdelta'): # If our first parent is in the manifest cache, we can # compute a delta here using properties we know about the @@ -1469,9 +1479,11 @@ # process. if self._treeondisk: assert readtree, "readtree must be set for treemanifest writes" + assert match, "match must be specified for treemanifest writes" m1 = readtree(self.tree, p1) m2 = readtree(self.tree, p2) - n = self._addtree(m, transaction, link, m1, m2, readtree) + n = self._addtree(m, transaction, link, m1, m2, readtree, + match=match) arraytext = None else: text = m.text() @@ -1483,17 +1495,17 @@ return n - def _addtree(self, m, transaction, link, m1, m2, readtree): + def _addtree(self, m, transaction, link, m1, m2, readtree, match): # If the manifest is unchanged compared to one parent, # don't write a new revision if self.tree != '' and (m.unmodifiedsince(m1) or m.unmodifiedsince( m2)): return m.node() - def writesubtree(subm, subp1, subp2): + def writesubtree(subm, subp1, subp2, match): sublog = self.dirlog(subm.dir()) sublog.add(subm, transaction, link, subp1, subp2, None, None, - readtree=readtree) - m.writesubtrees(m1, m2, writesubtree) + readtree=readtree, match=match) + m.writesubtrees(m1, m2, writesubtree, match) text = m.dirtext() n = None if self.tree != '': @@ -1697,9 +1709,9 @@ def read(self): return self._manifestdict - def write(self, transaction, link, p1, p2, added, removed): + def write(self, transaction, link, p1, p2, added, removed, match=None): return self._storage().add(self._manifestdict, transaction, link, - p1, p2, added, removed) + p1, p2, added, removed, match=match) @interfaceutil.implementer(repository.imanifestrevisionstored) class manifestctx(object): @@ -1802,11 +1814,12 @@ def read(self): return self._treemanifest - def write(self, transaction, link, p1, p2, added, removed): + def write(self, transaction, link, p1, p2, added, removed, match=None): def readtree(dir, node): return self._manifestlog.get(dir, node).read() return self._storage().add(self._treemanifest, transaction, link, - p1, p2, added, removed, readtree=readtree) + p1, p2, added, removed, readtree=readtree, + match=match) @interfaceutil.implementer(repository.imanifestrevisionstored) class treemanifestctx(object): diff -r 0b7594ada0db -r 24870f1be088 mercurial/repository.py --- a/mercurial/repository.py Mon Sep 17 15:16:20 2018 -0400 +++ b/mercurial/repository.py Fri Sep 14 16:29:51 2018 -0700 @@ -978,13 +978,18 @@ class imanifestrevisionwritable(imanifestrevisionbase): """Interface representing a manifest revision that can be committed.""" - def write(transaction, linkrev, p1node, p2node, added, removed): + def write(transaction, linkrev, p1node, p2node, added, removed, match=None): """Add this revision to storage. Takes a transaction object, the changeset revision number it will be associated with, its parent nodes, and lists of added and removed paths. + If match is provided, storage can choose not to inspect or write out + items that do not match. Storage is still required to be able to provide + the full manifest in the future for any directories written (these + manifests should not be "narrowed on disk"). + Returns the binary node of the created revision. """ @@ -1141,7 +1146,8 @@ def dirlog(d): """Obtain a manifest storage instance for a tree.""" - def add(m, transaction, link, p1, p2, added, removed, readtree=None): + def add(m, transaction, link, p1, p2, added, removed, readtree=None, + match=None): """Add a revision to storage. ``m`` is an object conforming to ``imanifestdict``. @@ -1152,6 +1158,15 @@ ``added`` and ``removed`` are iterables of added and removed paths, respectively. + + ``readtree`` is a function that can be used to read the child tree(s) + when recursively writing the full tree structure when using + treemanifets. + + ``match`` is a matcher that can be used to hint to storage that not all + paths must be inspected; this is an optimization and can be safely + ignored. Note that the storage must still be able to reproduce a full + manifest including files that did not match. """ class imanifestlog(interfaceutil.Interface):