# HG changeset patch # User Joerg Sonnenberger # Date 1594161396 -7200 # Node ID 61e7464477acd83bdc6f68b7b6331ad88333e3de # Parent 33524b6bef53fffcb34708e927c2b53f08fea736 phases: sparsify phaseroots and phasesets As final step of dealing with the holes in the phase numbers, make phaseroots and phasesets both dictionaries indexed by the phase number. Further adjust the interface of the C module by pushing the node to revision mapping down as it is cheaper on the C side to deal with revision numbers. Overall, the patch series improves a no-change "hg up" for my NetBSD test repository from 4.7s to 1.3s. Differential Revision: https://phab.mercurial-scm.org/D8698 diff -r 33524b6bef53 -r 61e7464477ac mercurial/cext/parsers.c --- a/mercurial/cext/parsers.c Tue Jul 07 14:01:12 2020 +0530 +++ b/mercurial/cext/parsers.c Wed Jul 08 00:36:36 2020 +0200 @@ -667,7 +667,7 @@ void manifest_module_init(PyObject *mod); void revlog_module_init(PyObject *mod); -static const int version = 16; +static const int version = 17; static void module_init(PyObject *mod) { diff -r 33524b6bef53 -r 61e7464477ac mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c Tue Jul 07 14:01:12 2020 +0530 +++ b/mercurial/cext/revlog.c Wed Jul 08 00:36:36 2020 +0200 @@ -109,6 +109,9 @@ static Py_ssize_t inline_scan(indexObject *self, const char **offsets); +static int index_find_node(indexObject *self, const char *node, + Py_ssize_t nodelen); + #if LONG_MAX == 0x7fffffffL static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#"); #else @@ -577,34 +580,6 @@ } } -static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list, - Py_ssize_t marker, char *phases) -{ - PyObject *iter = NULL; - PyObject *iter_item = NULL; - Py_ssize_t min_idx = index_length(self) + 2; - long iter_item_long; - - if (PyList_GET_SIZE(list) != 0) { - iter = PyObject_GetIter(list); - if (iter == NULL) - return -2; - while ((iter_item = PyIter_Next(iter))) { - if (!pylong_to_long(iter_item, &iter_item_long)) { - Py_DECREF(iter_item); - return -2; - } - Py_DECREF(iter_item); - if (iter_item_long < min_idx) - min_idx = iter_item_long; - phases[iter_item_long] = (char)marker; - } - Py_DECREF(iter); - } - - return min_idx; -} - static inline void set_phase_from_parents(char *phases, int parent_1, int parent_2, Py_ssize_t i) { @@ -773,99 +748,165 @@ return NULL; } +static int add_roots_get_min(indexObject *self, PyObject *roots, char *phases, + char phase) +{ + Py_ssize_t len = index_length(self); + PyObject *iter; + PyObject *item; + PyObject *iterator; + int rev, minrev = -1; + char *node; + + if (!PySet_Check(roots)) + return -2; + iterator = PyObject_GetIter(roots); + if (iterator == NULL) + return -2; + while ((item = PyIter_Next(iterator))) { + if (node_check(item, &node) == -1) + goto failed; + rev = index_find_node(self, node, 20); + /* null is implicitly public, so negative is invalid */ + if (rev < 0 || rev >= len) + goto failed; + phases[rev] = phase; + if (minrev == -1 || minrev > rev) + minrev = rev; + Py_DECREF(item); + } + Py_DECREF(iterator); + return minrev; +failed: + Py_DECREF(iterator); + Py_DECREF(item); + return -2; +} + static PyObject *compute_phases_map_sets(indexObject *self, PyObject *args) { - PyObject *roots = Py_None; + /* 0: public (untracked), 1: draft, 2: secret, 32: archive, + 96: internal */ + static const char trackedphases[] = {1, 2, 32, 96}; PyObject *ret = NULL; - PyObject *phasessize = NULL; + PyObject *roots = Py_None; + PyObject *idx = NULL; + PyObject *pyphase = NULL; + PyObject *pyrev = NULL; PyObject *phaseroots = NULL; - PyObject *phaseset = NULL; - PyObject *phasessetlist = NULL; - PyObject *rev = NULL; + PyObject *phasessize = NULL; + PyObject *phasesets[4] = {NULL, NULL, NULL, NULL}; Py_ssize_t len = index_length(self); - Py_ssize_t numphase = 0; - Py_ssize_t minrevallphases = 0; - Py_ssize_t minrevphase = 0; - Py_ssize_t i = 0; + const char *currentphase; char *phases = NULL; - long phase; + int minphaserev = -1, rev, i; + const int numphases = (int)(sizeof(phasesets) / sizeof(phasesets[0])); if (!PyArg_ParseTuple(args, "O", &roots)) - goto done; - if (roots == NULL || !PyList_Check(roots)) { - PyErr_SetString(PyExc_TypeError, "roots must be a list"); - goto done; + return NULL; + if (roots == NULL || !PyDict_Check(roots)) { + PyErr_SetString(PyExc_TypeError, "roots must be a dictionary"); + return NULL; } - phases = calloc( - len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */ + phases = calloc(len, 1); if (phases == NULL) { PyErr_NoMemory(); - goto done; + return NULL; } - /* Put the phase information of all the roots in phases */ - numphase = PyList_GET_SIZE(roots) + 1; - minrevallphases = len + 1; - phasessetlist = PyList_New(numphase); - if (phasessetlist == NULL) - goto done; + + for (i = 0; i < numphases; ++i) { + pyphase = PyInt_FromLong(trackedphases[i]); + if (pyphase == NULL) + goto release; + phaseroots = PyDict_GetItem(roots, pyphase); + Py_DECREF(pyphase); + if (phaseroots == NULL) + continue; + rev = add_roots_get_min(self, phaseroots, phases, trackedphases[i]); + phaseroots = NULL; + if (rev == -2) + goto release; + if (rev != -1 && (minphaserev == -1 || rev < minphaserev)) + minphaserev = rev; + } + + for (i = 0; i < numphases; ++i) { + phasesets[i] = PySet_New(NULL); + if (phasesets[i] == NULL) + goto release; + } - PyList_SET_ITEM(phasessetlist, 0, Py_None); - Py_INCREF(Py_None); - - for (i = 0; i < numphase - 1; i++) { - phaseroots = PyList_GET_ITEM(roots, i); - phaseset = PySet_New(NULL); - if (phaseset == NULL) + if (minphaserev == -1) + minphaserev = len; + for (rev = minphaserev; rev < len; ++rev) { + int parents[2]; + /* + * The parent lookup could be skipped for phaseroots, but + * phase --force would historically not recompute them + * correctly, leaving descendents with a lower phase around. + * As such, unconditionally recompute the phase. + */ + if (index_get_parents(self, rev, parents, (int)len - 1) < 0) goto release; - PyList_SET_ITEM(phasessetlist, i + 1, phaseset); - if (!PyList_Check(phaseroots)) { - PyErr_SetString(PyExc_TypeError, - "roots item must be a list"); + set_phase_from_parents(phases, parents[0], parents[1], rev); + switch (phases[rev]) { + case 0: + continue; + case 1: + pyphase = phasesets[0]; + break; + case 2: + pyphase = phasesets[1]; + break; + case 32: + pyphase = phasesets[2]; + break; + case 96: + pyphase = phasesets[3]; + break; + default: goto release; } - minrevphase = - add_roots_get_min(self, phaseroots, i + 1, phases); - if (minrevphase == -2) /* Error from add_roots_get_min */ + pyrev = PyInt_FromLong(rev); + if (pyrev == NULL) goto release; - minrevallphases = MIN(minrevallphases, minrevphase); + if (PySet_Add(pyphase, pyrev) == -1) { + Py_DECREF(pyrev); + goto release; + } + Py_DECREF(pyrev); } - /* Propagate the phase information from the roots to the revs */ - if (minrevallphases != -1) { - int parents[2]; - for (i = minrevallphases; i < len; i++) { - if (index_get_parents(self, i, parents, (int)len - 1) < - 0) - goto release; - set_phase_from_parents(phases, parents[0], parents[1], - i); + phaseroots = _dict_new_presized(numphases); + if (phaseroots == NULL) + goto release; + for (int i = 0; i < numphases; ++i) { + pyphase = PyInt_FromLong(trackedphases[i]); + if (pyphase == NULL) + goto release; + if (PyDict_SetItem(phaseroots, pyphase, phasesets[i]) == -1) { + Py_DECREF(pyphase); + goto release; } + Py_DECREF(phasesets[i]); + phasesets[i] = NULL; } - /* Transform phase list to a python list */ phasessize = PyInt_FromSsize_t(len); if (phasessize == NULL) goto release; - for (i = 0; i < len; i++) { - phase = phases[i]; - /* We only store the sets of phase for non public phase, the - * public phase is computed as a difference */ - if (phase != 0) { - phaseset = PyList_GET_ITEM(phasessetlist, phase); - rev = PyInt_FromSsize_t(i); - if (rev == NULL) - goto release; - PySet_Add(phaseset, rev); - Py_XDECREF(rev); - } - } - ret = PyTuple_Pack(2, phasessize, phasessetlist); + + ret = PyTuple_Pack(2, phasessize, phaseroots); + Py_DECREF(phasessize); + Py_DECREF(phaseroots); + return ret; release: - Py_XDECREF(phasessize); - Py_XDECREF(phasessetlist); -done: + for (i = 0; i < numphases; ++i) + Py_XDECREF(phasesets[i]); + Py_XDECREF(phaseroots); + free(phases); - return ret; + return NULL; } static PyObject *index_headrevs(indexObject *self, PyObject *args) diff -r 33524b6bef53 -r 61e7464477ac mercurial/phases.py --- a/mercurial/phases.py Tue Jul 07 14:01:12 2020 +0530 +++ b/mercurial/phases.py Wed Jul 08 00:36:36 2020 +0200 @@ -170,7 +170,7 @@ """ repo = repo.unfiltered() dirty = False - roots = [set() for i in range(max(allphases) + 1)] + roots = {i: set() for i in allphases} try: f, pending = txnutil.trypending(repo.root, repo.svfs, b'phaseroots') try: @@ -333,7 +333,11 @@ if len(cl) >= self._loadedrevslen: self.invalidate() self.loadphaserevs(repo) - return any(self.phaseroots[1:]) + return any( + revs + for phase, revs in pycompat.iteritems(self.phaseroots) + if phase != public + ) def nonpublicphaseroots(self, repo): """returns the roots of all non-public phases @@ -346,7 +350,13 @@ if len(cl) >= self._loadedrevslen: self.invalidate() self.loadphaserevs(repo) - return set().union(*[roots for roots in self.phaseroots[1:] if roots]) + return set().union( + *[ + revs + for phase, revs in pycompat.iteritems(self.phaseroots) + if phase != public + ] + ) def getrevset(self, repo, phases, subset=None): """return a smartset for the given phases""" @@ -405,7 +415,7 @@ # Shallow copy meant to ensure isolation in # advance/retractboundary(), nothing more. ph = self.__class__(None, None, _load=False) - ph.phaseroots = self.phaseroots[:] + ph.phaseroots = self.phaseroots.copy() ph.dirty = self.dirty ph.opener = self.opener ph._loadedrevslen = self._loadedrevslen @@ -425,21 +435,12 @@ def _getphaserevsnative(self, repo): repo = repo.unfiltered() - nativeroots = [] - for phase in trackedphases: - nativeroots.append( - pycompat.maplist(repo.changelog.rev, self.phaseroots[phase]) - ) - revslen, phasesets = repo.changelog.computephases(nativeroots) - phasesets2 = [set() for phase in range(max(allphases) + 1)] - for phase, phaseset in zip(allphases, phasesets): - phasesets2[phase] = phaseset - return revslen, phasesets2 + return repo.changelog.computephases(self.phaseroots) def _computephaserevspure(self, repo): repo = repo.unfiltered() cl = repo.changelog - self._phasesets = [set() for phase in range(max(allphases) + 1)] + self._phasesets = {phase: set() for phase in allphases} lowerroots = set() for phase in reversed(trackedphases): roots = pycompat.maplist(cl.rev, self.phaseroots[phase]) @@ -493,7 +494,7 @@ f.close() def _write(self, fp): - for phase, roots in enumerate(self.phaseroots): + for phase, roots in pycompat.iteritems(self.phaseroots): for h in sorted(roots): fp.write(b'%i %s\n' % (phase, hex(h))) self.dirty = False @@ -575,7 +576,11 @@ return changes def retractboundary(self, repo, tr, targetphase, nodes): - oldroots = self.phaseroots[: targetphase + 1] + oldroots = { + phase: revs + for phase, revs in pycompat.iteritems(self.phaseroots) + if phase <= targetphase + } if tr is None: phasetracking = None else: @@ -594,7 +599,7 @@ # find the phase of the affected revision for phase in pycompat.xrange(targetphase, -1, -1): if phase: - roots = oldroots[phase] + roots = oldroots.get(phase, []) revs = set(repo.revs(b'%ln::%ld', roots, affected)) affected -= revs else: # public phase @@ -648,7 +653,7 @@ """ filtered = False has_node = repo.changelog.index.has_node # to filter unknown nodes - for phase, nodes in enumerate(self.phaseroots): + for phase, nodes in pycompat.iteritems(self.phaseroots): missing = sorted(node for node in nodes if not has_node(node)) if missing: for mnode in missing: diff -r 33524b6bef53 -r 61e7464477ac mercurial/policy.py --- a/mercurial/policy.py Tue Jul 07 14:01:12 2020 +0530 +++ b/mercurial/policy.py Wed Jul 08 00:36:36 2020 +0200 @@ -80,7 +80,7 @@ ('cext', 'bdiff'): 3, ('cext', 'mpatch'): 1, ('cext', 'osutil'): 4, - ('cext', 'parsers'): 16, + ('cext', 'parsers'): 17, } # map import request to other package or module diff -r 33524b6bef53 -r 61e7464477ac tests/test-parseindex.t --- a/tests/test-parseindex.t Tue Jul 07 14:01:12 2020 +0530 +++ b/tests/test-parseindex.t Wed Jul 08 00:36:36 2020 +0200 @@ -185,7 +185,7 @@ > ops = [ > ('reachableroots', > lambda: cl.index.reachableroots2(0, [1], [0], False)), - > ('compute_phases_map_sets', lambda: cl.computephases([[0], []])), + > ('compute_phases_map_sets', lambda: cl.computephases({1: {cl.node(0)}})), > ('index_headrevs', lambda: cl.headrevs()), > ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)), > ('find_deepest', lambda: cl.ancestor(n0, n1)),