dirstate: track otherparent files same as nonnormal
authorDurham Goode <durham@fb.com>
Wed, 08 Mar 2017 17:35:20 -0800
changeset 31278 1c97a91a18dc
parent 31276 cd29673cebdb
child 31279 052bc876a879
dirstate: track otherparent files same as nonnormal Calling dirstate.setparents() is expensive in a large repo because it iterates over every file in the dirstate. It does so to undo any merge state or otherparent state files. Merge state files are already covered by dirstate._nonnormalset, so we just need to track otherparent files in a similar manner to avoid the full iteration here. Fixing this shaves 20-25% off histedit in large repos. I tested this by adding temporary debug logic to verify that the old files processed in the loop matched the new files processed in the loop and running the test suite.
mercurial/dirstate.py
mercurial/parsers.c
--- a/mercurial/dirstate.py	Mon Mar 06 09:28:33 2017 +0100
+++ b/mercurial/dirstate.py	Wed Mar 08 17:35:20 2017 -0800
@@ -55,10 +55,16 @@
 def nonnormalentries(dmap):
     '''Compute the nonnormal dirstate entries from the dmap'''
     try:
-        return parsers.nonnormalentries(dmap)
+        return parsers.nonnormalotherparententries(dmap)
     except AttributeError:
-        return set(fname for fname, e in dmap.iteritems()
-                   if e[0] != 'n' or e[3] == -1)
+        nonnorm = set()
+        otherparent = set()
+        for fname, e in dmap.iteritems():
+            if e[0] != 'n' or e[3] == -1:
+                nonnorm.add(fname)
+            if e[0] == 'n' and e[2] == -2:
+                otherparent.add(fname)
+        return nonnorm, otherparent
 
 class dirstate(object):
 
@@ -131,7 +137,15 @@
 
     @propertycache
     def _nonnormalset(self):
-        return nonnormalentries(self._map)
+        nonnorm, otherparents = nonnormalentries(self._map)
+        self._otherparentset = otherparents
+        return nonnorm
+
+    @propertycache
+    def _otherparentset(self):
+        nonnorm, otherparents = nonnormalentries(self._map)
+        self._nonnormalset = nonnorm
+        return otherparents
 
     @propertycache
     def _filefoldmap(self):
@@ -341,7 +355,12 @@
         self._pl = p1, p2
         copies = {}
         if oldp2 != nullid and p2 == nullid:
-            for f, s in self._map.iteritems():
+            candidatefiles = self._nonnormalset.union(self._otherparentset)
+            for f in candidatefiles:
+                s = self._map.get(f)
+                if s is None:
+                    continue
+
                 # Discard 'm' markers when moving away from a merge state
                 if s[0] == 'm':
                     if f in self._copymap:
@@ -427,7 +446,8 @@
 
     def invalidate(self):
         for a in ("_map", "_copymap", "_filefoldmap", "_dirfoldmap", "_branch",
-                  "_pl", "_dirs", "_ignore", "_nonnormalset"):
+                  "_pl", "_dirs", "_ignore", "_nonnormalset",
+                  "_otherparentset"):
             if a in self.__dict__:
                 delattr(self, a)
         self._lastnormaltime = 0
@@ -486,6 +506,8 @@
         self._map[f] = dirstatetuple(state, mode, size, mtime)
         if state != 'n' or mtime == -1:
             self._nonnormalset.add(f)
+        if size == -2:
+            self._otherparentset.add(f)
 
     def normal(self, f):
         '''Mark a file normal and clean.'''
@@ -560,6 +582,7 @@
                 size = -1
             elif entry[0] == 'n' and entry[2] == -2: # other parent
                 size = -2
+                self._otherparentset.add(f)
         self._map[f] = dirstatetuple('r', 0, size, 0)
         self._nonnormalset.add(f)
         if size == 0 and f in self._copymap:
@@ -659,6 +682,7 @@
     def clear(self):
         self._map = {}
         self._nonnormalset = set()
+        self._otherparentset = set()
         if "_dirs" in self.__dict__:
             delattr(self, "_dirs")
         self._copymap = {}
@@ -758,7 +782,7 @@
                     break
 
         st.write(parsers.pack_dirstate(self._map, self._copymap, self._pl, now))
-        self._nonnormalset = nonnormalentries(self._map)
+        self._nonnormalset, self._otherparentset = nonnormalentries(self._map)
         st.close()
         self._lastnormaltime = 0
         self._dirty = self._dirtypl = False
--- a/mercurial/parsers.c	Mon Mar 06 09:28:33 2017 +0100
+++ b/mercurial/parsers.c	Wed Mar 08 17:35:20 2017 -0800
@@ -560,11 +560,11 @@
 }
 
 /*
- * Build a set of non-normal entries from the dirstate dmap
+ * Build a set of non-normal and other parent entries from the dirstate dmap
 */
-static PyObject *nonnormalentries(PyObject *self, PyObject *args)
-{
-	PyObject *dmap, *nonnset = NULL, *fname, *v;
+static PyObject *nonnormalotherparententries(PyObject *self, PyObject *args) {
+	PyObject *dmap, *fname, *v;
+	PyObject *nonnset = NULL, *otherpset = NULL, *result = NULL;
 	Py_ssize_t pos;
 
 	if (!PyArg_ParseTuple(args, "O!:nonnormalentries",
@@ -575,6 +575,10 @@
 	if (nonnset == NULL)
 		goto bail;
 
+	otherpset = PySet_New(NULL);
+	if (otherpset == NULL)
+		goto bail;
+
 	pos = 0;
 	while (PyDict_Next(dmap, &pos, &fname, &v)) {
 		dirstateTupleObject *t;
@@ -585,19 +589,53 @@
 		}
 		t = (dirstateTupleObject *)v;
 
+		if (t->state == 'n' && t->size == -2) {
+			if (PySet_Add(otherpset, fname) == -1) {
+				goto bail;
+			}
+		}
+
 		if (t->state == 'n' && t->mtime != -1)
 			continue;
 		if (PySet_Add(nonnset, fname) == -1)
 			goto bail;
 	}
 
-	return nonnset;
+	result = Py_BuildValue("(OO)", nonnset, otherpset);
+	if (result == NULL)
+		goto bail;
+	return result;
 bail:
 	Py_XDECREF(nonnset);
+	Py_XDECREF(otherpset);
+	Py_XDECREF(result);
 	return NULL;
 }
 
 /*
+ * Build a set of non-normal entries from the dirstate dmap
+*/
+static PyObject *nonnormalentries(PyObject *self, PyObject *args)
+{
+	PyObject *nonnset = NULL, *combined = NULL;
+
+	combined = nonnormalotherparententries(self, args);
+	if (!combined) {
+		return NULL;
+	}
+
+	nonnset = PyTuple_GetItem(combined, 0);
+	if (!nonnset) {
+		Py_DECREF(combined);
+		return NULL;
+	}
+
+	Py_INCREF(nonnset);
+	Py_DECREF(combined);
+	return nonnset;
+}
+
+/*
  * Efficiently pack a dirstate object into its on-disk format.
  */
 static PyObject *pack_dirstate(PyObject *self, PyObject *args)
@@ -2816,6 +2854,9 @@
 	{"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
 	{"nonnormalentries", nonnormalentries, METH_VARARGS,
 	"create a set containing non-normal entries of given dirstate\n"},
+	{"nonnormalotherparententries", nonnormalotherparententries, METH_VARARGS,
+	"create a set containing non-normal and other parent entries of given "
+	"dirstate\n"},
 	{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
 	{"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
 	{"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},