reachableroots: use internal "revstates" array to test if rev is a root
authorYuya Nishihara <yuya@tcha.org>
Fri, 14 Aug 2015 15:43:29 +0900
changeset 26053 b68c9d232db6
parent 26052 b970418bbafe
child 26054 5049e10fed14
reachableroots: use internal "revstates" array to test if rev is a root The main goal of this patch series is to reduce the use of PyXxx() function that is likely to require ugly error handling and inc/decref. Plus, this is faster than using PySet_Contains(). revset #0: 0::tip 0) 0.004168 1) 0.003678 88% This patch ignores out-of-range roots as they are in the pure implementation. Because reachable sets are calculated from heads, and out-of-range heads raise IndexError, we can just take out-of-range roots as unreachable. Otherwise, the test of "hg log -Gr '. + wdir()'" would fail. "heads" argument is changed to a list. Should we have to rename the C function as its signature is changed?
mercurial/changelog.py
mercurial/parsers.c
mercurial/revset.py
tests/test-parseindex.t
--- a/mercurial/changelog.py	Tue Aug 18 16:40:10 2015 -0400
+++ b/mercurial/changelog.py	Fri Aug 14 15:43:29 2015 +0900
@@ -187,7 +187,7 @@
 
     def reachableroots(self, minroot, heads, roots, includepath=False):
         return revset.baseset(sorted(
-            self.index.reachableroots(minroot, heads, roots, includepath)))
+            self.index.reachableroots2(minroot, heads, roots, includepath)))
 
     def headrevs(self):
         if self.filteredrevs:
--- a/mercurial/parsers.c	Tue Aug 18 16:40:10 2015 -0400
+++ b/mercurial/parsers.c	Fri Aug 14 15:43:29 2015 +0900
@@ -1108,16 +1108,15 @@
 		phases[i] = phases[parent_2];
 }
 
-static PyObject *reachableroots(indexObject *self, PyObject *args)
+static PyObject *reachableroots2(indexObject *self, PyObject *args)
 {
 
 	/* Input */
 	long minroot;
 	PyObject *includepatharg = NULL;
 	int includepath = 0;
-	/* heads is a list */
+	/* heads and roots are lists */
 	PyObject *heads = NULL;
-	/* roots is a set */
 	PyObject *roots = NULL;
 	PyObject *reachable = NULL;
 
@@ -1136,12 +1135,13 @@
 	 * revstates: array of length len+1 (all revs + nullrev) */
 	int *tovisit = NULL;
 	long lentovisit = 0;
-	enum { RS_SEEN = 1 };
+	enum { RS_SEEN = 1, RS_ROOT = 2 };
 	char *revstates = NULL;
 
 	/* Get arguments */
 	if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
-				&PySet_Type, &roots, &PyBool_Type, &includepatharg))
+			      &PyList_Type, &roots,
+			      &PyBool_Type, &includepatharg))
 		goto bail;
 
 	if (includepatharg == Py_True)
@@ -1167,6 +1167,18 @@
 		goto bail;
 	}
 
+	l = PyList_GET_SIZE(roots);
+	for (i = 0; i < l; i++) {
+		revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
+		if (revnum == -1 && PyErr_Occurred())
+			goto bail;
+		/* If root is out of range, e.g. wdir(), it must be unreachable
+		 * from heads. So we can just ignore it. */
+		if (revnum + 1 < 0 || revnum + 1 >= len + 1)
+			continue;
+		revstates[revnum + 1] |= RS_ROOT;
+	}
+
 	/* Populate tovisit with all the heads */
 	l = PyList_GET_SIZE(heads);
 	for (i = 0; i < l; i++) {
@@ -1188,17 +1200,15 @@
 	while (k < lentovisit) {
 		/* Add the node to reachable if it is a root*/
 		revnum = tovisit[k++];
-		val = PyInt_FromLong(revnum);
-		if (val == NULL)
-			goto bail;
-		if (PySet_Contains(roots, val) == 1) {
+		if (revstates[revnum + 1] & RS_ROOT) {
+			val = PyInt_FromLong(revnum);
+			if (val == NULL)
+				goto bail;
 			PySet_Add(reachable, val);
-			if (includepath == 0) {
-				Py_DECREF(val);
+			Py_DECREF(val);
+			if (includepath == 0)
 				continue;
-			}
 		}
-		Py_DECREF(val);
 
 		/* Add its parents to the list of nodes to visit */
 		if (revnum == -1)
@@ -2434,7 +2444,7 @@
 	 "get an index entry"},
 	{"computephasesmapsets", (PyCFunction)compute_phases_map_sets,
 			METH_VARARGS, "compute phases"},
-	{"reachableroots", (PyCFunction)reachableroots, METH_VARARGS,
+	{"reachableroots2", (PyCFunction)reachableroots2, METH_VARARGS,
 		"reachableroots"},
 	{"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
 	 "get head revisions"}, /* Can do filtering since 3.2 */
--- a/mercurial/revset.py	Tue Aug 18 16:40:10 2015 -0400
+++ b/mercurial/revset.py	Fri Aug 14 15:43:29 2015 +0900
@@ -94,6 +94,7 @@
     if not roots:
         return baseset()
     parentrevs = repo.changelog.parentrevs
+    roots = set(roots)
     visit = list(heads)
     reachable = set()
     seen = {}
@@ -133,7 +134,7 @@
     # XXX this should be 'parentset.min()' assuming 'parentset' is a smartset
     # (and if it is not, it should.)
     minroot = min(roots)
-    roots = set(roots)
+    roots = list(roots)
     heads = list(heads)
     try:
         return repo.changelog.reachableroots(minroot, heads, roots, includepath)
--- a/tests/test-parseindex.t	Tue Aug 18 16:40:10 2015 -0400
+++ b/tests/test-parseindex.t	Fri Aug 14 15:43:29 2015 +0900
@@ -69,28 +69,53 @@
   $ python <<EOF
   > from mercurial import changelog, scmutil
   > cl = changelog.changelog(scmutil.vfs('.hg/store'))
-  > print 'goods:'
+  > print 'good heads:'
   > for head in [0, len(cl) - 1, -1]:
-  >     print'%s: %r' % (head, cl.reachableroots(0, [head], set([0])))
-  > print 'bads:'
+  >     print'%s: %r' % (head, cl.reachableroots(0, [head], [0]))
+  > print 'bad heads:'
   > for head in [len(cl), 10000, -2, -10000, None]:
   >     print '%s:' % head,
   >     try:
-  >         cl.reachableroots(0, [head], set([0]))
+  >         cl.reachableroots(0, [head], [0])
   >         print 'uncaught buffer overflow?'
   >     except (IndexError, TypeError) as inst:
   >         print inst
+  > print 'good roots:'
+  > for root in [0, len(cl) - 1, -1]:
+  >     print '%s: %r' % (root, cl.reachableroots(root, [len(cl) - 1], [root]))
+  > print 'out-of-range roots are ignored:'
+  > for root in [len(cl), 10000, -2, -10000]:
+  >     print '%s: %r' % (root, cl.reachableroots(root, [len(cl) - 1], [root]))
+  > print 'bad roots:'
+  > for root in [None]:
+  >     print '%s:' % root,
+  >     try:
+  >         cl.reachableroots(root, [len(cl) - 1], [root])
+  >         print 'uncaught error?'
+  >     except TypeError as inst:
+  >         print inst
   > EOF
-  goods:
+  good heads:
   0: <baseset [0]>
   1: <baseset [0]>
   -1: <baseset []>
-  bads:
+  bad heads:
   2: head out of range
   10000: head out of range
   -2: head out of range
   -10000: head out of range
   None: an integer is required
+  good roots:
+  0: <baseset [0]>
+  1: <baseset [1]>
+  -1: <baseset [-1]>
+  out-of-range roots are ignored:
+  2: <baseset []>
+  10000: <baseset []>
+  -2: <baseset []>
+  -10000: <baseset []>
+  bad roots:
+  None: an integer is required
 
   $ cd ..
 
@@ -127,7 +152,7 @@
   > n0, n1 = cl.node(0), cl.node(1)
   > ops = [
   >     ('reachableroots',
-  >      lambda: cl.index.reachableroots(0, [1], set([0]), False)),
+  >      lambda: cl.index.reachableroots2(0, [1], [0], False)),
   >     ('compute_phases_map_sets', lambda: cl.computephases([[0], []])),
   >     ('index_headrevs', lambda: cl.headrevs()),
   >     ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)),