mercurial: changeset 28386:1c658391b22f

parsers: optimize filtered headrevs logic The old native head revs logic would iterate over every node, starting from 0, and check if every node was filtered (by testing it against the filteredrevs python set). On large repos with hundreds of thousands of commits, this could take 150ms. This new logic iterates over the nodes in reverse order, and skips the filtered check if we've seen an unfiltered child of the node. This saves approximately a bagillion filteredrevs set checks, which shaves the time down from 150ms to 20ms during every branch cache write.

--- a/mercurial/parsers.c	Mon Mar 07 03:14:19 2016 +0900
+++ b/mercurial/parsers.c	Tue Mar 08 00:20:08 2016 -0800
@@ -1446,20 +1446,26 @@
 		goto bail;
 	}
 
-	for (i = 0; i < len; i++) {
+	for (i = len - 1; i >= 0; i--) {
 		int isfiltered;
 		int parents[2];
 
-		isfiltered = check_filter(filter, i);
-		if (isfiltered == -1) {
-			PyErr_SetString(PyExc_TypeError,
-				"unable to check filter");
-			goto bail;
-		}
-
-		if (isfiltered) {
-			nothead[i] = 1;
-			continue;
+		/* If nothead[i] == 1, it means we've seen an unfiltered child of this
+		 * node already, and therefore this node is not filtered. So we can skip
+		 * the expensive check_filter step.
+		 */
+		if (nothead[i] != 1) {
+			isfiltered = check_filter(filter, i);
+			if (isfiltered == -1) {
+				PyErr_SetString(PyExc_TypeError,
+					"unable to check filter");
+				goto bail;
+			}
+
+			if (isfiltered) {
+				nothead[i] = 1;
+				continue;
+			}
 		}
 
 		if (index_get_parents(self, i, parents, (int)len - 1) < 0)

author	Durham Goode <durham@fb.com>
	Tue, 08 Mar 2016 00:20:08 -0800
changeset 28386	1c658391b22f
parent 28385	3f9e25a42e69
child 28391	73905484ef70