grep: reduce the cost of pathauditor checks when grepping working copy
authorValentin Gatien-Baron <valentin.gatienbaron@gmail.com>
Mon, 25 May 2020 17:39:23 -0400
changeset 44865 233ee525dcef
parent 44864 06105aa8bc0e
child 44866 5258bffdb1d6
grep: reduce the cost of pathauditor checks when grepping working copy Running `time hg grep zxczxczxczxczxc -l` on mozilla-central: before: real 0m20,000s user 0m15,796s sys 0m4,189s after: real 0m10,903s user 0m8,964s sys 0m1,916s if vfs didn't call pathauditor at all: real 0m7,781s user 0m5,968s sys 0m1,790s Differential Revision: https://phab.mercurial-scm.org/D8582
mercurial/commands.py
mercurial/pathutil.py
--- a/mercurial/commands.py	Mon May 25 17:32:25 2020 -0400
+++ b/mercurial/commands.py	Mon May 25 17:39:23 2020 -0400
@@ -3609,31 +3609,38 @@
             parent = pctx.rev()
             matches.setdefault(parent, {})
         files = revfiles.setdefault(rev, [])
-        for fn in fns:
-            # fn might not exist in the revision (could be a file removed by the
-            # revision). We could check `fn not in ctx` even when rev is None,
-            # but it's less racy to protect againt that in readfile.
-            if rev is not None and fn not in ctx:
-                continue
-
-            copy = None
-            if follow:
-                copy = getrenamed(fn, rev)
-                if copy:
-                    copies.setdefault(rev, {})[fn] = copy
-                    if fn in skip:
-                        skip.add(copy)
-            if fn in skip:
-                continue
-            files.append(fn)
-
-            if fn not in matches[rev]:
-                grepbody(fn, rev, readfile(ctx, fn))
-
-            if diff:
-                pfn = copy or fn
-                if pfn not in matches[parent] and pfn in pctx:
-                    grepbody(pfn, parent, readfile(pctx, pfn))
+        if rev is None:
+            # in `hg grep pattern`, 2/3 of the time is spent is spent in
+            # pathauditor checks without this in mozilla-central
+            contextmanager = repo.wvfs.audit.cached
+        else:
+            contextmanager = util.nullcontextmanager
+        with contextmanager():
+            for fn in fns:
+                # fn might not exist in the revision (could be a file removed by
+                # the revision). We could check `fn not in ctx` even when rev is
+                # None, but it's less racy to protect againt that in readfile.
+                if rev is not None and fn not in ctx:
+                    continue
+
+                copy = None
+                if follow:
+                    copy = getrenamed(fn, rev)
+                    if copy:
+                        copies.setdefault(rev, {})[fn] = copy
+                        if fn in skip:
+                            skip.add(copy)
+                if fn in skip:
+                    continue
+                files.append(fn)
+
+                if fn not in matches[rev]:
+                    grepbody(fn, rev, readfile(ctx, fn))
+
+                if diff:
+                    pfn = copy or fn
+                    if pfn not in matches[parent] and pfn in pctx:
+                        grepbody(pfn, parent, readfile(pctx, pfn))
 
     ui.pager(b'grep')
     fm = ui.formatter(b'grep', opts)
--- a/mercurial/pathutil.py	Mon May 25 17:32:25 2020 -0400
+++ b/mercurial/pathutil.py	Mon May 25 17:39:23 2020 -0400
@@ -1,5 +1,6 @@
 from __future__ import absolute_import
 
+import contextlib
 import errno
 import os
 import posixpath
@@ -148,6 +149,19 @@
         except (OSError, error.Abort):
             return False
 
+    @contextlib.contextmanager
+    def cached(self):
+        if self._cached:
+            yield
+        else:
+            try:
+                self._cached = True
+                yield
+            finally:
+                self.audited.clear()
+                self.auditeddir.clear()
+                self._cached = False
+
 
 def canonpath(root, cwd, myname, auditor=None):
     '''return the canonical path of myname, given cwd and root