copies: make calculating lazy for dir move detection's "addedfiles"
authorKyle Lippincott <spectral@google.com>
Fri, 11 Dec 2020 13:39:56 -0800
changeset 46109 2f357d053df2
parent 46108 bdc2bf68f19e
child 46110 d90f439ff19f
copies: make calculating lazy for dir move detection's "addedfiles" The information calculated here was only needed if (a) --debug was specified, or (b) a directory move was plausibly detected. With tree manifests (especially in my pathological repo and with our custom setup), pre-calculating the `u1` and `u2` can be quite slow, and it's not even necessary in many cases. Let's delay calculating it until we know it's actually necessary. This should have no observable differences in output. ### Performance I ran a rebase command in my pathological repo, rebasing two nodes across several public phase commits, but where no directory copies exist in any of the paths I'm tracking. #### Before ``` Time (mean ± σ): 3.711 s ± 0.061 s [User: 0.3 ms, System: 1.5 ms] Range (min … max): 3.640 s … 3.827 s 10 runs ``` #### After ``` Time (mean ± σ): 868.3 ms ± 10.1 ms [User: 0.5 ms, System: 1.2 ms] Range (min … max): 856.6 ms … 883.6 ms 10 runs ``` Differential Revision: https://phab.mercurial-scm.org/D9567
mercurial/copies.py
--- a/mercurial/copies.py	Tue Dec 08 16:45:13 2020 -0800
+++ b/mercurial/copies.py	Fri Dec 11 13:39:56 2020 -0800
@@ -896,18 +896,33 @@
             )
 
     # find interesting file sets from manifests
-    addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
-    addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
-    u1 = sorted(addedinm1 - addedinm2)
-    u2 = sorted(addedinm2 - addedinm1)
+    cache = []
+
+    def _get_addedfiles(idx):
+        if not cache:
+            addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
+            addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
+            u1 = sorted(addedinm1 - addedinm2)
+            u2 = sorted(addedinm2 - addedinm1)
+            cache.extend((u1, u2))
+        return cache[idx]
 
-    header = b"  unmatched files in %s"
-    if u1:
-        repo.ui.debug(b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1)))
-    if u2:
-        repo.ui.debug(b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2)))
+    u1fn = lambda: _get_addedfiles(0)
+    u2fn = lambda: _get_addedfiles(1)
+    if repo.ui.debugflag:
+        u1 = u1fn()
+        u2 = u2fn()
 
-    if repo.ui.debugflag:
+        header = b"  unmatched files in %s"
+        if u1:
+            repo.ui.debug(
+                b"%s:\n   %s\n" % (header % b'local', b"\n   ".join(u1))
+            )
+        if u2:
+            repo.ui.debug(
+                b"%s:\n   %s\n" % (header % b'other', b"\n   ".join(u2))
+            )
+
         renamedeleteset = set()
         divergeset = set()
         for dsts in diverge.values():
@@ -941,8 +956,8 @@
 
     repo.ui.debug(b"  checking for directory renames\n")
 
-    dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
-    dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
+    dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
+    dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
 
     branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
     branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
@@ -950,14 +965,15 @@
     return branch_copies1, branch_copies2, diverge
 
 
-def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
+def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
     """Finds moved directories and files that should move with them.
 
     ctx: the context for one of the sides
     copy: files copied on the same side (as ctx)
     fullcopy: files copied on the same side (as ctx), including those that
               merge.manifestmerge() won't care about
-    addedfiles: added files on the other side (compared to ctx)
+    addedfilesfn: function returning added files on the other side (compared to
+                  ctx)
     """
     # generate a directory move map
     invalid = set()
@@ -997,7 +1013,7 @@
 
     movewithdir = {}
     # check unaccounted nonoverlapping files against directory moves
-    for f in addedfiles:
+    for f in addedfilesfn():
         if f not in fullcopy:
             for d in dirmove:
                 if f.startswith(d):