# HG changeset patch # User Martin von Zweigbergk # Date 1539415325 25200 # Node ID 19ed212de2d102731daffee4185eb6a5295bb8f1 # Parent 81e4f039a0cd854b23b3c7ff3abbd567e7584751 match: optimize matcher when all patterns are of rootfilesin kind Internally at Google, we use narrowspecs with only rootfilesin-kind patterns. Sometimes there are thousands of such patterns (i.e. thousands of tracked directories). In such cases, it can take quite long to build and evaluate the resulting matcher. This patch optimizes matchers that have only patterns of rootfilesin so it instead of creating a regular expression, it matches the given file's directory against the set of directories. In a repo with ~3600 tracked directories, it takes about 1.35 s to build the matcher and 2.7 s to walk the dirstate before this patch. After, it takes 0.04 s to create the matcher and 0.87 s to walk the dirstate. It may be worthwhile to do similar optimizations for e.g. patterns of type "kind:", but that's not a priority for us right now. Differential Revision: https://phab.mercurial-scm.org/D5058 diff -r 81e4f039a0cd -r 19ed212de2d1 mercurial/match.py --- a/mercurial/match.py Sat Oct 13 06:02:27 2018 -0400 +++ b/mercurial/match.py Sat Oct 13 00:22:05 2018 -0700 @@ -1164,8 +1164,20 @@ regex = '' if kindpats: - regex, mf = _buildregexmatch(kindpats, globsuffix) - matchfuncs.append(mf) + if all(k == 'rootfilesin' for k, p, s in kindpats): + dirs = {p for k, p, s in kindpats} + def mf(f): + i = f.rfind('/') + if i >= 0: + dir = f[:i] + else: + dir = '.' + return dir in dirs + regex = b'rootfilesin: %s' % sorted(dirs) + matchfuncs.append(mf) + else: + regex, mf = _buildregexmatch(kindpats, globsuffix) + matchfuncs.append(mf) if len(matchfuncs) == 1: return regex, matchfuncs[0] diff -r 81e4f039a0cd -r 19ed212de2d1 tests/test-walk.t --- a/tests/test-walk.t Sat Oct 13 06:02:27 2018 -0400 +++ b/tests/test-walk.t Sat Oct 13 00:22:05 2018 -0700 @@ -143,25 +143,25 @@ $ hg debugwalk -v 'rootfilesin:' * matcher: - + f fennel ../fennel f fenugreek ../fenugreek f fiddlehead ../fiddlehead $ hg debugwalk -v -I 'rootfilesin:' * matcher: - + f fennel ../fennel f fenugreek ../fenugreek f fiddlehead ../fiddlehead $ hg debugwalk -v 'rootfilesin:.' * matcher: - + f fennel ../fennel f fenugreek ../fenugreek f fiddlehead ../fiddlehead $ hg debugwalk -v -I 'rootfilesin:.' * matcher: - + f fennel ../fennel f fenugreek ../fenugreek f fiddlehead ../fiddlehead @@ -169,7 +169,7 @@ * matcher: , - m2=> + m2=> f beans/black ../beans/black f beans/borlotti ../beans/borlotti f beans/kidney ../beans/kidney @@ -182,19 +182,19 @@ f mammals/skunk skunk $ hg debugwalk -v 'rootfilesin:fennel' * matcher: - + $ hg debugwalk -v -I 'rootfilesin:fennel' * matcher: - + $ hg debugwalk -v 'rootfilesin:skunk' * matcher: - + $ hg debugwalk -v -I 'rootfilesin:skunk' * matcher: - + $ hg debugwalk -v 'rootfilesin:beans' * matcher: - + f beans/black ../beans/black f beans/borlotti ../beans/borlotti f beans/kidney ../beans/kidney @@ -203,7 +203,7 @@ f beans/turtle ../beans/turtle $ hg debugwalk -v -I 'rootfilesin:beans' * matcher: - + f beans/black ../beans/black f beans/borlotti ../beans/borlotti f beans/kidney ../beans/kidney @@ -212,25 +212,25 @@ f beans/turtle ../beans/turtle $ hg debugwalk -v 'rootfilesin:mammals' * matcher: - + f mammals/skunk skunk $ hg debugwalk -v -I 'rootfilesin:mammals' * matcher: - + f mammals/skunk skunk $ hg debugwalk -v 'rootfilesin:mammals/' * matcher: - + f mammals/skunk skunk $ hg debugwalk -v -I 'rootfilesin:mammals/' * matcher: - + f mammals/skunk skunk $ hg debugwalk -v -X 'rootfilesin:mammals' * matcher: , - m2=> + m2=> f beans/black ../beans/black f beans/borlotti ../beans/borlotti f beans/kidney ../beans/kidney