revlog: use node tree (native code) for shortest() calculation
authorMartin von Zweigbergk <martinvonz@google.com>
Wed, 02 May 2018 23:17:58 -0700
changeset 37968 0304f22497fa
parent 37967 7932be8b0559
child 37969 0db7fe7c34d3
revlog: use node tree (native code) for shortest() calculation I want to rewrite revlog.shortest() to disambiguate only among hex nodeids and then disambiguate the result with revnums at a higher level (in scmutil). However, that would slow down `hg log -T '{shortest(node,1)}\n'` from 5.0s to 6.8s, which I wasn't sure would be acceptable. So this patch makes revlog.shortest() use the node tree for finding the length of the shortest prefix that's unambiguous among nodeids. Once that has been found, it makes it longer until it is also not ambiguous with a revnum. This speeds up `hg log -T '{shortest(node,1)}\n'` from 5.0s to 4.0s. Differential Revision: https://phab.mercurial-scm.org/D3499
mercurial/cext/parsers.c
mercurial/cext/revlog.c
mercurial/policy.py
mercurial/revlog.py
--- a/mercurial/cext/parsers.c	Mon May 07 16:49:31 2018 -0700
+++ b/mercurial/cext/parsers.c	Wed May 02 23:17:58 2018 -0700
@@ -713,7 +713,7 @@
 void manifest_module_init(PyObject *mod);
 void revlog_module_init(PyObject *mod);
 
-static const int version = 4;
+static const int version = 5;
 
 static void module_init(PyObject *mod)
 {
--- a/mercurial/cext/revlog.c	Mon May 07 16:49:31 2018 -0700
+++ b/mercurial/cext/revlog.c	Wed May 02 23:17:58 2018 -0700
@@ -1259,6 +1259,55 @@
 	return nt_find(self, node, nodelen, 1);
 }
 
+/*
+ * Find the length of the shortest unique prefix of node.
+ *
+ * Return values:
+ *
+ *   -3: error (exception set)
+ *   -2: not found (no exception set)
+ * rest: length of shortest prefix
+ */
+static int nt_shortest(indexObject *self, const char *node)
+{
+	int level, off;
+
+	if (nt_init(self) == -1)
+		return -3;
+	if (nt_populate(self) == -1)
+		return -3;
+
+	for (level = off = 0; level < 40; level++) {
+		int k, v;
+		nodetree *n = &self->nt[off];
+		k = nt_level(node, level);
+		v = n->children[k];
+		if (v < 0) {
+			const char *n;
+			v = -(v + 1);
+			n = index_node(self, v);
+			if (memcmp(node, n, 20) != 0)
+				/*
+				 * Found a unique prefix, but it wasn't for the
+				 * requested node (i.e the requested node does
+				 * not exist).
+				 */
+				return -2;
+			return level + 1;
+		}
+		if (v == 0)
+			return -2;
+		off = v;
+	}
+	/*
+	 * The node was still not unique after 40 hex digits, so this won't
+	 * happen. Also, if we get here, then there's a programming error in
+	 * this file that made us insert a node longer than 40 hex digits.
+	 */
+	PyErr_SetString(PyExc_Exception, "broken node tree");
+	return -3;
+}
+
 static PyObject *index_partialmatch(indexObject *self, PyObject *args)
 {
 	const char *fullnode;
@@ -1307,6 +1356,29 @@
 	return PyBytes_FromStringAndSize(fullnode, 20);
 }
 
+static PyObject *index_shortest(indexObject *self, PyObject *args)
+{
+	Py_ssize_t nodelen;
+	PyObject *val;
+	char *node;
+	int length;
+
+	if (!PyArg_ParseTuple(args, "O", &val))
+		return NULL;
+	if (node_check(val, &node, &nodelen) == -1)
+		return NULL;
+
+	self->ntlookups++;
+	length = nt_shortest(self, node);
+	if (length == -3)
+		return NULL;
+	if (length == -2) {
+		raise_revlog_error();
+		return NULL;
+	}
+	return PyInt_FromLong(length);
+}
+
 static PyObject *index_m_get(indexObject *self, PyObject *args)
 {
 	Py_ssize_t nodelen;
@@ -1995,6 +2067,8 @@
 	 "insert an index entry"},
 	{"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS,
 	 "match a potentially ambiguous node ID"},
+	{"shortest", (PyCFunction)index_shortest, METH_VARARGS,
+	 "find length of shortest hex nodeid of a binary ID"},
 	{"stats", (PyCFunction)index_stats, METH_NOARGS,
 	 "stats for the index"},
 	{NULL} /* Sentinel */
--- a/mercurial/policy.py	Mon May 07 16:49:31 2018 -0700
+++ b/mercurial/policy.py	Wed May 02 23:17:58 2018 -0700
@@ -69,7 +69,7 @@
     (r'cext', r'bdiff'): 3,
     (r'cext', r'mpatch'): 1,
     (r'cext', r'osutil'): 4,
-    (r'cext', r'parsers'): 4,
+    (r'cext', r'parsers'): 5,
 }
 
 # map import request to other package or module
--- a/mercurial/revlog.py	Mon May 07 16:49:31 2018 -0700
+++ b/mercurial/revlog.py	Wed May 02 23:17:58 2018 -0700
@@ -1526,7 +1526,33 @@
                 raise LookupError(node, self.indexfile, _('no node'))
             return not isrev(prefix)
 
+        def maybewdir(prefix):
+            return all(c == 'f' for c in prefix)
+
         hexnode = hex(node)
+
+        def disambiguate(hexnode, minlength):
+            for length in range(minlength, 41):
+                prefix = hexnode[:length]
+                if not isrev(prefix) and not maybewdir(prefix):
+                    return prefix
+
+        if not getattr(self, 'filteredrevs', None):
+            try:
+                length = max(self.index.shortest(node), minlength)
+                return disambiguate(hexnode, length)
+            except RevlogError:
+                if node == wdirid:
+                    for length in range(minlength, 41):
+                        prefix = hexnode[:length]
+                        if isvalid(prefix):
+                            return prefix
+                else:
+                    raise LookupError(node, self.indexfile, _('no node'))
+            except AttributeError:
+                # Fall through to pure code
+                pass
+
         shortest = hexnode
         startlength = max(6, minlength)
         length = startlength