log: make file log slow path usable on large repos
authorDurham Goode <durham@fb.com>
Tue, 10 Sep 2013 19:49:34 -0700
changeset 19730 d184bae667e4
parent 19729 dfefb719eb92
child 19731 436a3f728375
log: make file log slow path usable on large repos Running "hg log <pattern or directory>" on large repos took a very, very long time because it first read ctx.files() for every commit before even starting to process the results. This change makes the ctx.files() check lazy, which makes the command start producing results immediately.
mercurial/cmdutil.py
--- a/mercurial/cmdutil.py	Fri Sep 13 15:40:04 2013 -0500
+++ b/mercurial/cmdutil.py	Tue Sep 10 19:49:34 2013 -0700
@@ -1172,12 +1172,34 @@
                                'filenames'))
 
         # The slow path checks files modified in every changeset.
-        for i in sorted(revs):
-            ctx = change(i)
-            matches = filter(match, ctx.files())
-            if matches:
-                fncache[i] = matches
-                wanted.add(i)
+        # This is really slow on large repos, so compute the set lazily.
+        class lazywantedset(object):
+            def __init__(self):
+                self.set = set()
+                self.revs = set(revs)
+
+            # No need to worry about locality here because it will be accessed
+            # in the same order as the increasing window below.
+            def __contains__(self, value):
+                if value in self.set:
+                    return True
+                elif not value in self.revs:
+                    return False
+                else:
+                    self.revs.discard(value)
+                    ctx = change(value)
+                    matches = filter(match, ctx.files())
+                    if matches:
+                        fncache[value] = matches
+                        self.set.add(value)
+                        return True
+                    return False
+
+            def discard(self, value):
+                self.revs.discard(value)
+                self.set.discard(value)
+
+        wanted = lazywantedset()
 
     class followfilter(object):
         def __init__(self, onlyfirst=False):