dirstate: track updated files to improve write time
authorDurham Goode <durham@fb.com>
Sun, 05 Mar 2017 16:20:07 -0800
changeset 31206 49e5491ed9bd
parent 31205 a48c6ac5c13a
child 31207 1ef37b16b8e8
dirstate: track updated files to improve write time Previously, dirstate.write() would iterate over the entire dirstate to find any entries that needed to be marked 'lookup' (i.e. if they have the same timestamp as now). This was O(working copy) and slow in large repos. It was most visible when rebasing or histediting multiple commits, since it gets executed once per commit, even if the entire rebase/histedit is wrapped in a transaction. The fix is to track which files have been editted, and only check those to see if they need to be marked as 'lookup'. This saves 25% on histedit times in very large repositories. I tested this by adding temporary debug logic to verify that the old files processed in the loop matched the new files processed in the loop and running the test suite.
mercurial/dirstate.py
--- a/mercurial/dirstate.py	Mon Mar 06 03:09:15 2017 -0800
+++ b/mercurial/dirstate.py	Sun Mar 05 16:20:07 2017 -0800
@@ -89,6 +89,7 @@
         self._pendingfilename = '%s.pending' % self._filename
         self._plchangecallbacks = {}
         self._origpl = None
+        self._updatedfiles = set()
 
         # for consistent view between _pl() and _read() invocations
         self._pendingmode = None
@@ -431,6 +432,7 @@
                 delattr(self, a)
         self._lastnormaltime = 0
         self._dirty = False
+        self._updatedfiles.clear()
         self._parentwriters = 0
         self._origpl = None
 
@@ -441,8 +443,11 @@
         self._dirty = True
         if source is not None:
             self._copymap[dest] = source
+            self._updatedfiles.add(source)
+            self._updatedfiles.add(dest)
         elif dest in self._copymap:
             del self._copymap[dest]
+            self._updatedfiles.add(dest)
 
     def copied(self, file):
         return self._copymap.get(file, None)
@@ -459,6 +464,8 @@
             if normed in self._filefoldmap:
                 del self._filefoldmap[normed]
 
+        self._updatedfiles.add(f)
+
     def _addpath(self, f, state, mode, size, mtime):
         oldstate = self[f]
         if state == 'a' or oldstate == 'r':
@@ -475,6 +482,7 @@
         if oldstate in "?r" and "_dirs" in self.__dict__:
             self._dirs.addpath(f)
         self._dirty = True
+        self._updatedfiles.add(f)
         self._map[f] = dirstatetuple(state, mode, size, mtime)
         if state != 'n' or mtime == -1:
             self._nonnormalset.add(f)
@@ -656,6 +664,7 @@
         self._copymap = {}
         self._pl = [nullid, nullid]
         self._lastnormaltime = 0
+        self._updatedfiles.clear()
         self._dirty = True
 
     def rebuild(self, parent, allfiles, changedfiles=None):
@@ -692,13 +701,15 @@
             # emulate dropping timestamp in 'parsers.pack_dirstate'
             now = _getfsnow(self._opener)
             dmap = self._map
-            for f, e in dmap.iteritems():
-                if e[0] == 'n' and e[3] == now:
+            for f in self._updatedfiles:
+                e = dmap.get(f)
+                if e is not None and e[0] == 'n' and e[3] == now:
                     dmap[f] = dirstatetuple(e[0], e[1], e[2], -1)
                     self._nonnormalset.add(f)
 
             # emulate that all 'dirstate.normal' results are written out
             self._lastnormaltime = 0
+            self._updatedfiles.clear()
 
             # delay writing in-memory changes out
             tr.addfilegenerator('dirstate', (self._filename,),