revlog: only build the nodemap on demand
authorMatt Mackall <mpm@selenic.com>
Tue, 11 Jan 2011 17:01:04 -0600
changeset 13254 5ef5eb1f3515
parent 13253 61c9bc3da402
child 13255 2696730ca233
revlog: only build the nodemap on demand
contrib/perf.py
mercurial/parsers.c
mercurial/revlog.py
tests/test-parseindex2.py
--- a/contrib/perf.py	Tue Jan 04 14:12:52 2011 -0600
+++ b/contrib/perf.py	Tue Jan 11 17:01:04 2011 -0600
@@ -81,8 +81,9 @@
 
 def perfindex(ui, repo):
     import mercurial.changelog
+    n = repo["tip"].node()
     def d():
-        t = repo.changelog.tip()
+        repo[n]
         repo.invalidate()
     timer(d)
 
--- a/mercurial/parsers.c	Tue Jan 04 14:12:52 2011 -0600
+++ b/mercurial/parsers.c	Tue Jan 11 17:01:04 2011 -0600
@@ -244,41 +244,6 @@
 const char nullid[20];
 const int nullrev = -1;
 
-/* create an index tuple, insert into the nodemap */
-static PyObject * _build_idx_entry(PyObject *nodemap, int n, uint64_t offset_flags,
-                                   int comp_len, int uncomp_len, int base_rev,
-                                   int link_rev, int parent_1, int parent_2,
-                                   const char *c_node_id)
-{
-	int err;
-	PyObject *entry, *node_id, *n_obj;
-
-	node_id = PyBytes_FromStringAndSize(c_node_id, 20);
-	n_obj = PyInt_FromLong(n);
-
-	if (!node_id || !n_obj)
-		err = -1;
-	else
-		err = PyDict_SetItem(nodemap, node_id, n_obj);
-
-	Py_XDECREF(n_obj);
-	if (err)
-		goto error_dealloc;
-
-	entry = Py_BuildValue("LiiiiiiN", offset_flags, comp_len,
-			      uncomp_len, base_rev, link_rev,
-			      parent_1, parent_2, node_id);
-	if (!entry)
-		goto error_dealloc;
-	PyObject_GC_UnTrack(entry); /* don't waste time with this */
-
-	return entry;
-
-error_dealloc:
-	Py_XDECREF(node_id);
-	return NULL;
-}
-
 /* RevlogNG format (all in big endian, data may be inlined):
  *    6 bytes: offset
  *    2 bytes: flags
@@ -290,8 +255,8 @@
  *    4 bytes: parent 2 revision
  *   32 bytes: nodeid (only 20 bytes used)
  */
-static int _parse_index_ng (const char *data, int size, int inlined,
-			    PyObject *index, PyObject *nodemap)
+static int _parse_index_ng(const char *data, int size, int inlined,
+			   PyObject *index)
 {
 	PyObject *entry;
 	int n = 0, err;
@@ -321,13 +286,15 @@
 		parent_2 = ntohl(*((uint32_t *)(decode + 28)));
 		c_node_id = decode + 32;
 
-		entry = _build_idx_entry(nodemap, n, offset_flags,
-					comp_len, uncomp_len, base_rev,
-					link_rev, parent_1, parent_2,
-					c_node_id);
+		entry = Py_BuildValue("Liiiiiis#", offset_flags, comp_len,
+			      uncomp_len, base_rev, link_rev,
+			      parent_1, parent_2, c_node_id, 20);
+
 		if (!entry)
 			return 0;
 
+		PyObject_GC_UnTrack(entry); /* don't waste time with this */
+
 		if (inlined) {
 			err = PyList_Append(index, entry);
 			Py_DECREF(entry);
@@ -348,12 +315,14 @@
 		return 0;
 	}
 
-	/* create the nullid/nullrev entry in the nodemap and the
-	 * magic nullid entry in the index at [-1] */
-	entry = _build_idx_entry(nodemap,
-			nullrev, 0, 0, 0, -1, -1, -1, -1, nullid);
+	/* create the magic nullid entry in the index at [-1] */
+	entry = Py_BuildValue("Liiiiiis#", (uint64_t)0, 0, 0, -1, -1, -1, -1, nullid, 20);
+
 	if (!entry)
 		return 0;
+
+	PyObject_GC_UnTrack(entry); /* don't waste time with this */
+
 	if (inlined) {
 		err = PyList_Append(index, entry);
 		Py_DECREF(entry);
@@ -366,17 +335,16 @@
 }
 
 /* This function parses a index file and returns a Python tuple of the
- * following format: (index, nodemap, cache)
+ * following format: (index, cache)
  *
  * index: a list of tuples containing the RevlogNG records
- * nodemap: a dict mapping node ids to indices in the index list
  * cache: if data is inlined, a tuple (index_file_content, 0) else None
  */
-static PyObject *parse_index(PyObject *self, PyObject *args)
+static PyObject *parse_index2(PyObject *self, PyObject *args)
 {
 	const char *data;
 	int size, inlined;
-	PyObject *rval = NULL, *index = NULL, *nodemap = NULL, *cache = NULL;
+	PyObject *rval = NULL, *index = NULL, *cache = NULL;
 	PyObject *data_obj = NULL, *inlined_obj;
 
 	if (!PyArg_ParseTuple(args, "s#O", &data, &size, &inlined_obj))
@@ -390,10 +358,6 @@
 	if (!index)
 		goto quit;
 
-	nodemap = PyDict_New();
-	if (!nodemap)
-		goto quit;
-
 	/* set up the cache return value */
 	if (inlined) {
 		/* Note that the reference to data_obj is only borrowed */
@@ -406,18 +370,17 @@
 		Py_INCREF(Py_None);
 	}
 
-	/* actually populate the index and the nodemap with data */
-	if (!_parse_index_ng (data, size, inlined, index, nodemap))
+	/* actually populate the index with data */
+	if (!_parse_index_ng(data, size, inlined, index))
 		goto quit;
 
-	rval = Py_BuildValue("NNN", index, nodemap, cache);
+	rval = Py_BuildValue("NN", index, cache);
 	if (!rval)
 		goto quit;
 	return rval;
 
 quit:
 	Py_XDECREF(index);
-	Py_XDECREF(nodemap);
 	Py_XDECREF(cache);
 	Py_XDECREF(rval);
 	return NULL;
@@ -429,7 +392,7 @@
 static PyMethodDef methods[] = {
 	{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
 	{"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
-	{"parse_index", parse_index, METH_VARARGS, "parse a revlog index\n"},
+	{"parse_index2", parse_index2, METH_VARARGS, "parse a revlog index\n"},
 	{NULL, NULL}
 };
 
--- a/mercurial/revlog.py	Tue Jan 04 14:12:52 2011 -0600
+++ b/mercurial/revlog.py	Tue Jan 11 17:01:04 2011 -0600
@@ -172,8 +172,8 @@
 
     def parseindex(self, fp, data, inline):
         # call the C implementation to parse the index data
-        index, nodemap, cache = parsers.parse_index(data, inline)
-        return index, nodemap, cache
+        index, cache = parsers.parse_index2(data, inline)
+        return index, None, cache
 
     def packentry(self, entry, node, version, rev):
         p = _pack(indexformatng, *entry)
@@ -218,7 +218,6 @@
         self.opener = opener
         self._cache = None
         self._chunkcache = (0, '')
-        self.nodemap = {nullid: nullrev}
         self.index = []
         self._shallowroot = shallowroot
         self._parentdelta = 0
@@ -267,7 +266,9 @@
                 d = self._io.parseindex(f, i, self._inline)
             except (ValueError, IndexError):
                 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
-            self.index, self.nodemap, self._chunkcache = d
+            self.index, n, self._chunkcache = d
+            if n:
+                self.nodemap = n
             if not self._chunkcache:
                 self._chunkclear()
 
@@ -275,6 +276,14 @@
         if self.index == [] or self.index[-1][7] != nullid:
             self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
 
+    @util.propertycache
+    def nodemap(self):
+        n = {nullid: nullrev}
+        i = self.index
+        for r in xrange(len(i) - 1):
+            n[i[r][7]] = r
+        return n
+
     def tip(self):
         return self.node(len(self.index) - 2)
     def __len__(self):
--- a/tests/test-parseindex2.py	Tue Jan 04 14:12:52 2011 -0600
+++ b/tests/test-parseindex2.py	Tue Jan 11 17:01:04 2011 -0600
@@ -50,7 +50,7 @@
     # add the magic null revision at -1
     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
 
-    return index, nodemap, cache
+    return index, cache
 
 
 data_inlined = '\x00\x01\x00\x01\x00\x00\x00\x00\x00\x00\x01\x8c' \
@@ -97,10 +97,10 @@
 def runtest() :
 
     py_res_1 = py_parseindex(data_inlined, True)
-    c_res_1 = parsers.parse_index(data_inlined, True)
+    c_res_1 = parsers.parse_index2(data_inlined, True)
 
     py_res_2 = py_parseindex(data_non_inlined, False)
-    c_res_2 = parsers.parse_index(data_non_inlined, False)
+    c_res_2 = parsers.parse_index2(data_non_inlined, False)
 
     if py_res_1 != c_res_1:
         print "Parse index result (with inlined data) differs!"