phases: sparsify phaseroots and phasesets
authorJoerg Sonnenberger <joerg@bec.de>
Wed, 08 Jul 2020 00:36:36 +0200
changeset 45131 61e7464477ac
parent 45130 33524b6bef53
child 45132 9d532329ee97
phases: sparsify phaseroots and phasesets As final step of dealing with the holes in the phase numbers, make phaseroots and phasesets both dictionaries indexed by the phase number. Further adjust the interface of the C module by pushing the node to revision mapping down as it is cheaper on the C side to deal with revision numbers. Overall, the patch series improves a no-change "hg up" for my NetBSD test repository from 4.7s to 1.3s. Differential Revision: https://phab.mercurial-scm.org/D8698
mercurial/cext/parsers.c
mercurial/cext/revlog.c
mercurial/phases.py
mercurial/policy.py
tests/test-parseindex.t
--- a/mercurial/cext/parsers.c	Tue Jul 07 14:01:12 2020 +0530
+++ b/mercurial/cext/parsers.c	Wed Jul 08 00:36:36 2020 +0200
@@ -667,7 +667,7 @@
 void manifest_module_init(PyObject *mod);
 void revlog_module_init(PyObject *mod);
 
-static const int version = 16;
+static const int version = 17;
 
 static void module_init(PyObject *mod)
 {
--- a/mercurial/cext/revlog.c	Tue Jul 07 14:01:12 2020 +0530
+++ b/mercurial/cext/revlog.c	Wed Jul 08 00:36:36 2020 +0200
@@ -109,6 +109,9 @@
 
 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
 
+static int index_find_node(indexObject *self, const char *node,
+                           Py_ssize_t nodelen);
+
 #if LONG_MAX == 0x7fffffffL
 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
 #else
@@ -577,34 +580,6 @@
 	}
 }
 
-static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list,
-                                    Py_ssize_t marker, char *phases)
-{
-	PyObject *iter = NULL;
-	PyObject *iter_item = NULL;
-	Py_ssize_t min_idx = index_length(self) + 2;
-	long iter_item_long;
-
-	if (PyList_GET_SIZE(list) != 0) {
-		iter = PyObject_GetIter(list);
-		if (iter == NULL)
-			return -2;
-		while ((iter_item = PyIter_Next(iter))) {
-			if (!pylong_to_long(iter_item, &iter_item_long)) {
-				Py_DECREF(iter_item);
-				return -2;
-			}
-			Py_DECREF(iter_item);
-			if (iter_item_long < min_idx)
-				min_idx = iter_item_long;
-			phases[iter_item_long] = (char)marker;
-		}
-		Py_DECREF(iter);
-	}
-
-	return min_idx;
-}
-
 static inline void set_phase_from_parents(char *phases, int parent_1,
                                           int parent_2, Py_ssize_t i)
 {
@@ -773,99 +748,165 @@
 	return NULL;
 }
 
+static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases,
+                             char phase)
+{
+	Py_ssize_t len = index_length(self);
+	PyObject *iter;
+	PyObject *item;
+	PyObject *iterator;
+	int rev, minrev = -1;
+	char *node;
+
+	if (!PySet_Check(roots))
+		return -2;
+	iterator = PyObject_GetIter(roots);
+	if (iterator == NULL)
+		return -2;
+	while ((item = PyIter_Next(iterator))) {
+		if (node_check(item, &node) == -1)
+			goto failed;
+		rev = index_find_node(self, node, 20);
+		/* null is implicitly public, so negative is invalid */
+		if (rev < 0 || rev >= len)
+			goto failed;
+		phases[rev] = phase;
+		if (minrev == -1 || minrev > rev)
+			minrev = rev;
+		Py_DECREF(item);
+	}
+	Py_DECREF(iterator);
+	return minrev;
+failed:
+	Py_DECREF(iterator);
+	Py_DECREF(item);
+	return -2;
+}
+
 static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args)
 {
-	PyObject *roots = Py_None;
+	/* 0: public (untracked), 1: draft, 2: secret, 32: archive,
+	   96: internal */
+	static const char trackedphases[] = {1, 2, 32, 96};
 	PyObject *ret = NULL;
-	PyObject *phasessize = NULL;
+	PyObject *roots = Py_None;
+	PyObject *idx = NULL;
+	PyObject *pyphase = NULL;
+	PyObject *pyrev = NULL;
 	PyObject *phaseroots = NULL;
-	PyObject *phaseset = NULL;
-	PyObject *phasessetlist = NULL;
-	PyObject *rev = NULL;
+	PyObject *phasessize = NULL;
+	PyObject *phasesets[4] = {NULL, NULL, NULL, NULL};
 	Py_ssize_t len = index_length(self);
-	Py_ssize_t numphase = 0;
-	Py_ssize_t minrevallphases = 0;
-	Py_ssize_t minrevphase = 0;
-	Py_ssize_t i = 0;
+	const char *currentphase;
 	char *phases = NULL;
-	long phase;
+	int minphaserev = -1, rev, i;
+	const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0]));
 
 	if (!PyArg_ParseTuple(args, "O", &roots))
-		goto done;
-	if (roots == NULL || !PyList_Check(roots)) {
-		PyErr_SetString(PyExc_TypeError, "roots must be a list");
-		goto done;
+		return NULL;
+	if (roots == NULL || !PyDict_Check(roots)) {
+		PyErr_SetString(PyExc_TypeError, "roots must be a dictionary");
+		return NULL;
 	}
 
-	phases = calloc(
-	    len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
+	phases = calloc(len, 1);
 	if (phases == NULL) {
 		PyErr_NoMemory();
-		goto done;
+		return NULL;
 	}
-	/* Put the phase information of all the roots in phases */
-	numphase = PyList_GET_SIZE(roots) + 1;
-	minrevallphases = len + 1;
-	phasessetlist = PyList_New(numphase);
-	if (phasessetlist == NULL)
-		goto done;
+
+	for (i = 0; i < numphases; ++i) {
+		pyphase = PyInt_FromLong(trackedphases[i]);
+		if (pyphase == NULL)
+			goto release;
+		phaseroots = PyDict_GetItem(roots, pyphase);
+		Py_DECREF(pyphase);
+		if (phaseroots == NULL)
+			continue;
+		rev = add_roots_get_min(self, phaseroots, phases, trackedphases[i]);
+		phaseroots = NULL;
+		if (rev == -2)
+			goto release;
+		if (rev != -1 && (minphaserev == -1 || rev < minphaserev))
+			minphaserev = rev;
+	}
+
+	for (i = 0; i < numphases; ++i) {
+		phasesets[i] = PySet_New(NULL);
+		if (phasesets[i] == NULL)
+			goto release;
+	}
 
-	PyList_SET_ITEM(phasessetlist, 0, Py_None);
-	Py_INCREF(Py_None);
-
-	for (i = 0; i < numphase - 1; i++) {
-		phaseroots = PyList_GET_ITEM(roots, i);
-		phaseset = PySet_New(NULL);
-		if (phaseset == NULL)
+	if (minphaserev == -1)
+		minphaserev = len;
+	for (rev = minphaserev; rev < len; ++rev) {
+		int parents[2];
+		/*
+		 * The parent lookup could be skipped for phaseroots, but
+		 * phase --force would historically not recompute them
+		 * correctly, leaving descendents with a lower phase around.
+		 * As such, unconditionally recompute the phase.
+		 */
+		if (index_get_parents(self, rev, parents, (int)len - 1) < 0)
 			goto release;
-		PyList_SET_ITEM(phasessetlist, i + 1, phaseset);
-		if (!PyList_Check(phaseroots)) {
-			PyErr_SetString(PyExc_TypeError,
-			                "roots item must be a list");
+		set_phase_from_parents(phases, parents[0], parents[1], rev);
+		switch (phases[rev]) {
+		case 0:
+			continue;
+		case 1:
+			pyphase = phasesets[0];
+			break;
+		case 2:
+			pyphase = phasesets[1];
+			break;
+		case 32:
+			pyphase = phasesets[2];
+			break;
+		case 96:
+			pyphase = phasesets[3];
+			break;
+		default:
 			goto release;
 		}
-		minrevphase =
-		    add_roots_get_min(self, phaseroots, i + 1, phases);
-		if (minrevphase == -2) /* Error from add_roots_get_min */
+		pyrev = PyInt_FromLong(rev);
+		if (pyrev == NULL)
 			goto release;
-		minrevallphases = MIN(minrevallphases, minrevphase);
+		if (PySet_Add(pyphase, pyrev) == -1) {
+			Py_DECREF(pyrev);
+			goto release;
+		}
+		Py_DECREF(pyrev);
 	}
-	/* Propagate the phase information from the roots to the revs */
-	if (minrevallphases != -1) {
-		int parents[2];
-		for (i = minrevallphases; i < len; i++) {
-			if (index_get_parents(self, i, parents, (int)len - 1) <
-			    0)
-				goto release;
-			set_phase_from_parents(phases, parents[0], parents[1],
-			                       i);
+	phaseroots = _dict_new_presized(numphases);
+	if (phaseroots == NULL)
+		goto release;
+	for (int i = 0; i < numphases; ++i) {
+		pyphase = PyInt_FromLong(trackedphases[i]);
+		if (pyphase == NULL)
+			goto release;
+		if (PyDict_SetItem(phaseroots, pyphase, phasesets[i]) == -1) {
+			Py_DECREF(pyphase);
+			goto release;
 		}
+		Py_DECREF(phasesets[i]);
+		phasesets[i] = NULL;
 	}
-	/* Transform phase list to a python list */
 	phasessize = PyInt_FromSsize_t(len);
 	if (phasessize == NULL)
 		goto release;
-	for (i = 0; i < len; i++) {
-		phase = phases[i];
-		/* We only store the sets of phase for non public phase, the
-		 * public phase is computed as a difference */
-		if (phase != 0) {
-			phaseset = PyList_GET_ITEM(phasessetlist, phase);
-			rev = PyInt_FromSsize_t(i);
-			if (rev == NULL)
-				goto release;
-			PySet_Add(phaseset, rev);
-			Py_XDECREF(rev);
-		}
-	}
-	ret = PyTuple_Pack(2, phasessize, phasessetlist);
+
+	ret = PyTuple_Pack(2, phasessize, phaseroots);
+	Py_DECREF(phasessize);
+	Py_DECREF(phaseroots);
+	return ret;
 
 release:
-	Py_XDECREF(phasessize);
-	Py_XDECREF(phasessetlist);
-done:
+	for (i = 0; i < numphases; ++i)
+		Py_XDECREF(phasesets[i]);
+	Py_XDECREF(phaseroots);
+
 	free(phases);
-	return ret;
+	return NULL;
 }
 
 static PyObject *index_headrevs(indexObject *self, PyObject *args)
--- a/mercurial/phases.py	Tue Jul 07 14:01:12 2020 +0530
+++ b/mercurial/phases.py	Wed Jul 08 00:36:36 2020 +0200
@@ -170,7 +170,7 @@
     """
     repo = repo.unfiltered()
     dirty = False
-    roots = [set() for i in range(max(allphases) + 1)]
+    roots = {i: set() for i in allphases}
     try:
         f, pending = txnutil.trypending(repo.root, repo.svfs, b'phaseroots')
         try:
@@ -333,7 +333,11 @@
         if len(cl) >= self._loadedrevslen:
             self.invalidate()
             self.loadphaserevs(repo)
-        return any(self.phaseroots[1:])
+        return any(
+            revs
+            for phase, revs in pycompat.iteritems(self.phaseroots)
+            if phase != public
+        )
 
     def nonpublicphaseroots(self, repo):
         """returns the roots of all non-public phases
@@ -346,7 +350,13 @@
         if len(cl) >= self._loadedrevslen:
             self.invalidate()
             self.loadphaserevs(repo)
-        return set().union(*[roots for roots in self.phaseroots[1:] if roots])
+        return set().union(
+            *[
+                revs
+                for phase, revs in pycompat.iteritems(self.phaseroots)
+                if phase != public
+            ]
+        )
 
     def getrevset(self, repo, phases, subset=None):
         """return a smartset for the given phases"""
@@ -405,7 +415,7 @@
         # Shallow copy meant to ensure isolation in
         # advance/retractboundary(), nothing more.
         ph = self.__class__(None, None, _load=False)
-        ph.phaseroots = self.phaseroots[:]
+        ph.phaseroots = self.phaseroots.copy()
         ph.dirty = self.dirty
         ph.opener = self.opener
         ph._loadedrevslen = self._loadedrevslen
@@ -425,21 +435,12 @@
 
     def _getphaserevsnative(self, repo):
         repo = repo.unfiltered()
-        nativeroots = []
-        for phase in trackedphases:
-            nativeroots.append(
-                pycompat.maplist(repo.changelog.rev, self.phaseroots[phase])
-            )
-        revslen, phasesets = repo.changelog.computephases(nativeroots)
-        phasesets2 = [set() for phase in range(max(allphases) + 1)]
-        for phase, phaseset in zip(allphases, phasesets):
-            phasesets2[phase] = phaseset
-        return revslen, phasesets2
+        return repo.changelog.computephases(self.phaseroots)
 
     def _computephaserevspure(self, repo):
         repo = repo.unfiltered()
         cl = repo.changelog
-        self._phasesets = [set() for phase in range(max(allphases) + 1)]
+        self._phasesets = {phase: set() for phase in allphases}
         lowerroots = set()
         for phase in reversed(trackedphases):
             roots = pycompat.maplist(cl.rev, self.phaseroots[phase])
@@ -493,7 +494,7 @@
             f.close()
 
     def _write(self, fp):
-        for phase, roots in enumerate(self.phaseroots):
+        for phase, roots in pycompat.iteritems(self.phaseroots):
             for h in sorted(roots):
                 fp.write(b'%i %s\n' % (phase, hex(h)))
         self.dirty = False
@@ -575,7 +576,11 @@
         return changes
 
     def retractboundary(self, repo, tr, targetphase, nodes):
-        oldroots = self.phaseroots[: targetphase + 1]
+        oldroots = {
+            phase: revs
+            for phase, revs in pycompat.iteritems(self.phaseroots)
+            if phase <= targetphase
+        }
         if tr is None:
             phasetracking = None
         else:
@@ -594,7 +599,7 @@
             # find the phase of the affected revision
             for phase in pycompat.xrange(targetphase, -1, -1):
                 if phase:
-                    roots = oldroots[phase]
+                    roots = oldroots.get(phase, [])
                     revs = set(repo.revs(b'%ln::%ld', roots, affected))
                     affected -= revs
                 else:  # public phase
@@ -648,7 +653,7 @@
         """
         filtered = False
         has_node = repo.changelog.index.has_node  # to filter unknown nodes
-        for phase, nodes in enumerate(self.phaseroots):
+        for phase, nodes in pycompat.iteritems(self.phaseroots):
             missing = sorted(node for node in nodes if not has_node(node))
             if missing:
                 for mnode in missing:
--- a/mercurial/policy.py	Tue Jul 07 14:01:12 2020 +0530
+++ b/mercurial/policy.py	Wed Jul 08 00:36:36 2020 +0200
@@ -80,7 +80,7 @@
     ('cext', 'bdiff'): 3,
     ('cext', 'mpatch'): 1,
     ('cext', 'osutil'): 4,
-    ('cext', 'parsers'): 16,
+    ('cext', 'parsers'): 17,
 }
 
 # map import request to other package or module
--- a/tests/test-parseindex.t	Tue Jul 07 14:01:12 2020 +0530
+++ b/tests/test-parseindex.t	Wed Jul 08 00:36:36 2020 +0200
@@ -185,7 +185,7 @@
   > ops = [
   >     ('reachableroots',
   >      lambda: cl.index.reachableroots2(0, [1], [0], False)),
-  >     ('compute_phases_map_sets', lambda: cl.computephases([[0], []])),
+  >     ('compute_phases_map_sets', lambda: cl.computephases({1: {cl.node(0)}})),
   >     ('index_headrevs', lambda: cl.headrevs()),
   >     ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)),
   >     ('find_deepest', lambda: cl.ancestor(n0, n1)),