mercurial/cext/manifest.c
changeset 32372 df448de7cf3b
parent 30168 1a327889c13c
child 33757 e9996bd7203f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/cext/manifest.c	Sat Aug 13 12:23:56 2016 +0900
@@ -0,0 +1,939 @@
+/*
+ * manifest.c - manifest type that does on-demand parsing.
+ *
+ * Copyright 2015, Google Inc.
+ *
+ * This software may be used and distributed according to the terms of
+ * the GNU General Public License, incorporated herein by reference.
+ */
+#include <Python.h>
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "util.h"
+
+#define DEFAULT_LINES 100000
+
+typedef struct {
+	char *start;
+	Py_ssize_t len; /* length of line including terminal newline */
+	char hash_suffix;
+	bool from_malloc;
+	bool deleted;
+} line;
+
+typedef struct {
+	PyObject_HEAD
+	PyObject *pydata;
+	line *lines;
+	int numlines; /* number of line entries */
+	int livelines; /* number of non-deleted lines */
+	int maxlines; /* allocated number of lines */
+	bool dirty;
+} lazymanifest;
+
+#define MANIFEST_OOM -1
+#define MANIFEST_NOT_SORTED -2
+#define MANIFEST_MALFORMED -3
+
+/* defined in parsers.c */
+PyObject *unhexlify(const char *str, int len);
+
+/* get the length of the path for a line */
+static size_t pathlen(line *l) {
+	return strlen(l->start);
+}
+
+/* get the node value of a single line */
+static PyObject *nodeof(line *l) {
+	char *s = l->start;
+	ssize_t llen = pathlen(l);
+	PyObject *hash = unhexlify(s + llen + 1, 40);
+	if (!hash) {
+		return NULL;
+	}
+	if (l->hash_suffix != '\0') {
+		char newhash[21];
+		memcpy(newhash, PyBytes_AsString(hash), 20);
+		Py_DECREF(hash);
+		newhash[20] = l->hash_suffix;
+		hash = PyBytes_FromStringAndSize(newhash, 21);
+	}
+	return hash;
+}
+
+/* get the node hash and flags of a line as a tuple */
+static PyObject *hashflags(line *l)
+{
+	char *s = l->start;
+	size_t plen = pathlen(l);
+	PyObject *hash = nodeof(l);
+
+	/* 40 for hash, 1 for null byte, 1 for newline */
+	size_t hplen = plen + 42;
+	Py_ssize_t flen = l->len - hplen;
+	PyObject *flags;
+	PyObject *tup;
+
+	if (!hash)
+		return NULL;
+	flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
+	if (!flags) {
+		Py_DECREF(hash);
+		return NULL;
+	}
+	tup = PyTuple_Pack(2, hash, flags);
+	Py_DECREF(flags);
+	Py_DECREF(hash);
+	return tup;
+}
+
+/* if we're about to run out of space in the line index, add more */
+static bool realloc_if_full(lazymanifest *self)
+{
+	if (self->numlines == self->maxlines) {
+		self->maxlines *= 2;
+		self->lines = realloc(self->lines, self->maxlines * sizeof(line));
+	}
+	return !!self->lines;
+}
+
+/*
+ * Find the line boundaries in the manifest that 'data' points to and store
+ * information about each line in 'self'.
+ */
+static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
+{
+	char *prev = NULL;
+	while (len > 0) {
+		line *l;
+		char *next = memchr(data, '\n', len);
+		if (!next) {
+			return MANIFEST_MALFORMED;
+		}
+		next++; /* advance past newline */
+		if (!realloc_if_full(self)) {
+			return MANIFEST_OOM; /* no memory */
+		}
+		if (prev && strcmp(prev, data) > -1) {
+			/* This data isn't sorted, so we have to abort. */
+			return MANIFEST_NOT_SORTED;
+		}
+		l = self->lines + ((self->numlines)++);
+		l->start = data;
+		l->len = next - data;
+		l->hash_suffix = '\0';
+		l->from_malloc = false;
+		l->deleted = false;
+		len = len - l->len;
+		prev = data;
+		data = next;
+	}
+	self->livelines = self->numlines;
+	return 0;
+}
+
+static int lazymanifest_init(lazymanifest *self, PyObject *args)
+{
+	char *data;
+	Py_ssize_t len;
+	int err, ret;
+	PyObject *pydata;
+	if (!PyArg_ParseTuple(args, "S", &pydata)) {
+		return -1;
+	}
+	err = PyBytes_AsStringAndSize(pydata, &data, &len);
+
+	self->dirty = false;
+	if (err == -1)
+		return -1;
+	self->pydata = pydata;
+	Py_INCREF(self->pydata);
+	Py_BEGIN_ALLOW_THREADS
+	self->lines = malloc(DEFAULT_LINES * sizeof(line));
+	self->maxlines = DEFAULT_LINES;
+	self->numlines = 0;
+	if (!self->lines)
+		ret = MANIFEST_OOM;
+	else
+		ret = find_lines(self, data, len);
+	Py_END_ALLOW_THREADS
+	switch (ret) {
+	case 0:
+		break;
+	case MANIFEST_OOM:
+		PyErr_NoMemory();
+		break;
+	case MANIFEST_NOT_SORTED:
+		PyErr_Format(PyExc_ValueError,
+			     "Manifest lines not in sorted order.");
+		break;
+	case MANIFEST_MALFORMED:
+		PyErr_Format(PyExc_ValueError,
+			     "Manifest did not end in a newline.");
+		break;
+	default:
+		PyErr_Format(PyExc_ValueError,
+			     "Unknown problem parsing manifest.");
+	}
+	return ret == 0 ? 0 : -1;
+}
+
+static void lazymanifest_dealloc(lazymanifest *self)
+{
+	/* free any extra lines we had to allocate */
+	int i;
+	for (i = 0; i < self->numlines; i++) {
+		if (self->lines[i].from_malloc) {
+			free(self->lines[i].start);
+		}
+	}
+	if (self->lines) {
+		free(self->lines);
+		self->lines = NULL;
+	}
+	if (self->pydata) {
+		Py_DECREF(self->pydata);
+		self->pydata = NULL;
+	}
+	PyObject_Del(self);
+}
+
+/* iteration support */
+
+typedef struct {
+	PyObject_HEAD lazymanifest *m;
+	Py_ssize_t pos;
+} lmIter;
+
+static void lmiter_dealloc(PyObject *o)
+{
+	lmIter *self = (lmIter *)o;
+	Py_DECREF(self->m);
+	PyObject_Del(self);
+}
+
+static line *lmiter_nextline(lmIter *self)
+{
+	do {
+		self->pos++;
+		if (self->pos >= self->m->numlines) {
+			return NULL;
+		}
+		/* skip over deleted manifest entries */
+	} while (self->m->lines[self->pos].deleted);
+	return self->m->lines + self->pos;
+}
+
+static PyObject *lmiter_iterentriesnext(PyObject *o)
+{
+	size_t pl;
+	line *l;
+	Py_ssize_t consumed;
+	PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
+	l = lmiter_nextline((lmIter *)o);
+	if (!l) {
+		goto done;
+	}
+	pl = pathlen(l);
+	path = PyBytes_FromStringAndSize(l->start, pl);
+	hash = nodeof(l);
+	consumed = pl + 41;
+	flags = PyBytes_FromStringAndSize(l->start + consumed,
+					   l->len - consumed - 1);
+	if (!path || !hash || !flags) {
+		goto done;
+	}
+	ret = PyTuple_Pack(3, path, hash, flags);
+done:
+	Py_XDECREF(path);
+	Py_XDECREF(hash);
+	Py_XDECREF(flags);
+	return ret;
+}
+
+#ifdef IS_PY3K
+#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
+#else
+#define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
+	| Py_TPFLAGS_HAVE_ITER
+#endif
+
+static PyTypeObject lazymanifestEntriesIterator = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	"parsers.lazymanifest.entriesiterator", /*tp_name */
+	sizeof(lmIter),                  /*tp_basicsize */
+	0,                               /*tp_itemsize */
+	lmiter_dealloc,                  /*tp_dealloc */
+	0,                               /*tp_print */
+	0,                               /*tp_getattr */
+	0,                               /*tp_setattr */
+	0,                               /*tp_compare */
+	0,                               /*tp_repr */
+	0,                               /*tp_as_number */
+	0,                               /*tp_as_sequence */
+	0,                               /*tp_as_mapping */
+	0,                               /*tp_hash */
+	0,                               /*tp_call */
+	0,                               /*tp_str */
+	0,                               /*tp_getattro */
+	0,                               /*tp_setattro */
+	0,                               /*tp_as_buffer */
+	LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
+	"Iterator for 3-tuples in a lazymanifest.",  /* tp_doc */
+	0,                               /* tp_traverse */
+	0,                               /* tp_clear */
+	0,                               /* tp_richcompare */
+	0,                               /* tp_weaklistoffset */
+	PyObject_SelfIter,               /* tp_iter: __iter__() method */
+	lmiter_iterentriesnext,          /* tp_iternext: next() method */
+};
+
+static PyObject *lmiter_iterkeysnext(PyObject *o)
+{
+	size_t pl;
+	line *l = lmiter_nextline((lmIter *)o);
+	if (!l) {
+		return NULL;
+	}
+	pl = pathlen(l);
+	return PyBytes_FromStringAndSize(l->start, pl);
+}
+
+#ifdef IS_PY3K
+#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
+#else
+#define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
+	| Py_TPFLAGS_HAVE_ITER
+#endif
+
+static PyTypeObject lazymanifestKeysIterator = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	"parsers.lazymanifest.keysiterator", /*tp_name */
+	sizeof(lmIter),                  /*tp_basicsize */
+	0,                               /*tp_itemsize */
+	lmiter_dealloc,                  /*tp_dealloc */
+	0,                               /*tp_print */
+	0,                               /*tp_getattr */
+	0,                               /*tp_setattr */
+	0,                               /*tp_compare */
+	0,                               /*tp_repr */
+	0,                               /*tp_as_number */
+	0,                               /*tp_as_sequence */
+	0,                               /*tp_as_mapping */
+	0,                               /*tp_hash */
+	0,                               /*tp_call */
+	0,                               /*tp_str */
+	0,                               /*tp_getattro */
+	0,                               /*tp_setattro */
+	0,                               /*tp_as_buffer */
+	LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
+	"Keys iterator for a lazymanifest.",  /* tp_doc */
+	0,                               /* tp_traverse */
+	0,                               /* tp_clear */
+	0,                               /* tp_richcompare */
+	0,                               /* tp_weaklistoffset */
+	PyObject_SelfIter,               /* tp_iter: __iter__() method */
+	lmiter_iterkeysnext,             /* tp_iternext: next() method */
+};
+
+static lazymanifest *lazymanifest_copy(lazymanifest *self);
+
+static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
+{
+	lmIter *i = NULL;
+	lazymanifest *t = lazymanifest_copy(self);
+	if (!t) {
+		PyErr_NoMemory();
+		return NULL;
+	}
+	i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
+	if (i) {
+		i->m = t;
+		i->pos = -1;
+	} else {
+		Py_DECREF(t);
+		PyErr_NoMemory();
+	}
+	return (PyObject *)i;
+}
+
+static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
+{
+	lmIter *i = NULL;
+	lazymanifest *t = lazymanifest_copy(self);
+	if (!t) {
+		PyErr_NoMemory();
+		return NULL;
+	}
+	i = PyObject_New(lmIter, &lazymanifestKeysIterator);
+	if (i) {
+		i->m = t;
+		i->pos = -1;
+	} else {
+		Py_DECREF(t);
+		PyErr_NoMemory();
+	}
+	return (PyObject *)i;
+}
+
+/* __getitem__ and __setitem__ support */
+
+static Py_ssize_t lazymanifest_size(lazymanifest *self)
+{
+	return self->livelines;
+}
+
+static int linecmp(const void *left, const void *right)
+{
+	return strcmp(((const line *)left)->start,
+		      ((const line *)right)->start);
+}
+
+static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
+{
+	line needle;
+	line *hit;
+	if (!PyBytes_Check(key)) {
+		PyErr_Format(PyExc_TypeError,
+			     "getitem: manifest keys must be a string.");
+		return NULL;
+	}
+	needle.start = PyBytes_AsString(key);
+	hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
+		      &linecmp);
+	if (!hit || hit->deleted) {
+		PyErr_Format(PyExc_KeyError, "No such manifest entry.");
+		return NULL;
+	}
+	return hashflags(hit);
+}
+
+static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
+{
+	line needle;
+	line *hit;
+	if (!PyBytes_Check(key)) {
+		PyErr_Format(PyExc_TypeError,
+			     "delitem: manifest keys must be a string.");
+		return -1;
+	}
+	needle.start = PyBytes_AsString(key);
+	hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
+		      &linecmp);
+	if (!hit || hit->deleted) {
+		PyErr_Format(PyExc_KeyError,
+			     "Tried to delete nonexistent manifest entry.");
+		return -1;
+	}
+	self->dirty = true;
+	hit->deleted = true;
+	self->livelines--;
+	return 0;
+}
+
+/* Do a binary search for the insertion point for new, creating the
+ * new entry if needed. */
+static int internalsetitem(lazymanifest *self, line *new) {
+	int start = 0, end = self->numlines;
+	while (start < end) {
+		int pos = start + (end - start) / 2;
+		int c = linecmp(new, self->lines + pos);
+		if (c < 0)
+			end = pos;
+		else if (c > 0)
+			start = pos + 1;
+		else {
+			if (self->lines[pos].deleted)
+				self->livelines++;
+			if (self->lines[pos].from_malloc)
+				free(self->lines[pos].start);
+			start = pos;
+			goto finish;
+		}
+	}
+	/* being here means we need to do an insert */
+	if (!realloc_if_full(self)) {
+		PyErr_NoMemory();
+		return -1;
+	}
+	memmove(self->lines + start + 1, self->lines + start,
+		(self->numlines - start) * sizeof(line));
+	self->numlines++;
+	self->livelines++;
+finish:
+	self->lines[start] = *new;
+	self->dirty = true;
+	return 0;
+}
+
+static int lazymanifest_setitem(
+	lazymanifest *self, PyObject *key, PyObject *value)
+{
+	char *path;
+	Py_ssize_t plen;
+	PyObject *pyhash;
+	Py_ssize_t hlen;
+	char *hash;
+	PyObject *pyflags;
+	char *flags;
+	Py_ssize_t flen;
+	size_t dlen;
+	char *dest;
+	int i;
+	line new;
+	if (!PyBytes_Check(key)) {
+		PyErr_Format(PyExc_TypeError,
+			     "setitem: manifest keys must be a string.");
+		return -1;
+	}
+	if (!value) {
+		return lazymanifest_delitem(self, key);
+	}
+	if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
+		PyErr_Format(PyExc_TypeError,
+			     "Manifest values must be a tuple of (node, flags).");
+		return -1;
+	}
+	if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
+		return -1;
+	}
+
+	pyhash = PyTuple_GetItem(value, 0);
+	if (!PyBytes_Check(pyhash)) {
+		PyErr_Format(PyExc_TypeError,
+			     "node must be a 20-byte string");
+		return -1;
+	}
+	hlen = PyBytes_Size(pyhash);
+	/* Some parts of the codebase try and set 21 or 22
+	 * byte "hash" values in order to perturb things for
+	 * status. We have to preserve at least the 21st
+	 * byte. Sigh. If there's a 22nd byte, we drop it on
+	 * the floor, which works fine.
+	 */
+	if (hlen != 20 && hlen != 21 && hlen != 22) {
+		PyErr_Format(PyExc_TypeError,
+			     "node must be a 20-byte string");
+		return -1;
+	}
+	hash = PyBytes_AsString(pyhash);
+
+	pyflags = PyTuple_GetItem(value, 1);
+	if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
+		PyErr_Format(PyExc_TypeError,
+			     "flags must a 0 or 1 byte string");
+		return -1;
+	}
+	if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
+		return -1;
+	}
+	/* one null byte and one newline */
+	dlen = plen + 41 + flen + 1;
+	dest = malloc(dlen);
+	if (!dest) {
+		PyErr_NoMemory();
+		return -1;
+	}
+	memcpy(dest, path, plen + 1);
+	for (i = 0; i < 20; i++) {
+		/* Cast to unsigned, so it will not get sign-extended when promoted
+		 * to int (as is done when passing to a variadic function)
+		 */
+		sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
+	}
+	memcpy(dest + plen + 41, flags, flen);
+	dest[plen + 41 + flen] = '\n';
+	new.start = dest;
+	new.len = dlen;
+	new.hash_suffix = '\0';
+	if (hlen > 20) {
+		new.hash_suffix = hash[20];
+	}
+	new.from_malloc = true;     /* is `start` a pointer we allocated? */
+	new.deleted = false;        /* is this entry deleted? */
+	if (internalsetitem(self, &new)) {
+		return -1;
+	}
+	return 0;
+}
+
+static PyMappingMethods lazymanifest_mapping_methods = {
+	(lenfunc)lazymanifest_size,             /* mp_length */
+	(binaryfunc)lazymanifest_getitem,       /* mp_subscript */
+	(objobjargproc)lazymanifest_setitem,    /* mp_ass_subscript */
+};
+
+/* sequence methods (important or __contains__ builds an iterator) */
+
+static int lazymanifest_contains(lazymanifest *self, PyObject *key)
+{
+	line needle;
+	line *hit;
+	if (!PyBytes_Check(key)) {
+		/* Our keys are always strings, so if the contains
+		 * check is for a non-string, just return false. */
+		return 0;
+	}
+	needle.start = PyBytes_AsString(key);
+	hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
+		      &linecmp);
+	if (!hit || hit->deleted) {
+		return 0;
+	}
+	return 1;
+}
+
+static PySequenceMethods lazymanifest_seq_meths = {
+	(lenfunc)lazymanifest_size, /* sq_length */
+	0, /* sq_concat */
+	0, /* sq_repeat */
+	0, /* sq_item */
+	0, /* sq_slice */
+	0, /* sq_ass_item */
+	0, /* sq_ass_slice */
+	(objobjproc)lazymanifest_contains, /* sq_contains */
+	0, /* sq_inplace_concat */
+	0, /* sq_inplace_repeat */
+};
+
+
+/* Other methods (copy, diff, etc) */
+static PyTypeObject lazymanifestType;
+
+/* If the manifest has changes, build the new manifest text and reindex it. */
+static int compact(lazymanifest *self) {
+	int i;
+	ssize_t need = 0;
+	char *data;
+	line *src, *dst;
+	PyObject *pydata;
+	if (!self->dirty)
+		return 0;
+	for (i = 0; i < self->numlines; i++) {
+		if (!self->lines[i].deleted) {
+			need += self->lines[i].len;
+		}
+	}
+	pydata = PyBytes_FromStringAndSize(NULL, need);
+	if (!pydata)
+		return -1;
+	data = PyBytes_AsString(pydata);
+	if (!data) {
+		return -1;
+	}
+	src = self->lines;
+	dst = self->lines;
+	for (i = 0; i < self->numlines; i++, src++) {
+		char *tofree = NULL;
+		if (src->from_malloc) {
+			tofree = src->start;
+		}
+		if (!src->deleted) {
+			memcpy(data, src->start, src->len);
+			*dst = *src;
+			dst->start = data;
+			dst->from_malloc = false;
+			data += dst->len;
+			dst++;
+		}
+		free(tofree);
+	}
+	Py_DECREF(self->pydata);
+	self->pydata = pydata;
+	self->numlines = self->livelines;
+	self->dirty = false;
+	return 0;
+}
+
+static PyObject *lazymanifest_text(lazymanifest *self)
+{
+	if (compact(self) != 0) {
+		PyErr_NoMemory();
+		return NULL;
+	}
+	Py_INCREF(self->pydata);
+	return self->pydata;
+}
+
+static lazymanifest *lazymanifest_copy(lazymanifest *self)
+{
+	lazymanifest *copy = NULL;
+	if (compact(self) != 0) {
+		goto nomem;
+	}
+	copy = PyObject_New(lazymanifest, &lazymanifestType);
+	if (!copy) {
+		goto nomem;
+	}
+	copy->numlines = self->numlines;
+	copy->livelines = self->livelines;
+	copy->dirty = false;
+	copy->lines = malloc(self->maxlines *sizeof(line));
+	if (!copy->lines) {
+		goto nomem;
+	}
+	memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
+	copy->maxlines = self->maxlines;
+	copy->pydata = self->pydata;
+	Py_INCREF(copy->pydata);
+	return copy;
+nomem:
+	PyErr_NoMemory();
+	Py_XDECREF(copy);
+	return NULL;
+}
+
+static lazymanifest *lazymanifest_filtercopy(
+	lazymanifest *self, PyObject *matchfn)
+{
+	lazymanifest *copy = NULL;
+	int i;
+	if (!PyCallable_Check(matchfn)) {
+		PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
+		return NULL;
+	}
+	/* compact ourselves first to avoid double-frees later when we
+	 * compact tmp so that it doesn't have random pointers to our
+	 * underlying from_malloc-data (self->pydata is safe) */
+	if (compact(self) != 0) {
+		goto nomem;
+	}
+	copy = PyObject_New(lazymanifest, &lazymanifestType);
+	if (!copy) {
+		goto nomem;
+	}
+	copy->dirty = true;
+	copy->lines = malloc(self->maxlines * sizeof(line));
+	if (!copy->lines) {
+		goto nomem;
+	}
+	copy->maxlines = self->maxlines;
+	copy->numlines = 0;
+	copy->pydata = self->pydata;
+	Py_INCREF(self->pydata);
+	for (i = 0; i < self->numlines; i++) {
+		PyObject *arglist = NULL, *result = NULL;
+		arglist = Py_BuildValue("(s)", self->lines[i].start);
+		if (!arglist) {
+			return NULL;
+		}
+		result = PyObject_CallObject(matchfn, arglist);
+		Py_DECREF(arglist);
+		/* if the callback raised an exception, just let it
+		 * through and give up */
+		if (!result) {
+			free(copy->lines);
+			Py_DECREF(self->pydata);
+			return NULL;
+		}
+		if (PyObject_IsTrue(result)) {
+			assert(!(self->lines[i].from_malloc));
+			copy->lines[copy->numlines++] = self->lines[i];
+		}
+		Py_DECREF(result);
+	}
+	copy->livelines = copy->numlines;
+	return copy;
+nomem:
+	PyErr_NoMemory();
+	Py_XDECREF(copy);
+	return NULL;
+}
+
+static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
+{
+	lazymanifest *other;
+	PyObject *pyclean = NULL;
+	bool listclean;
+	PyObject *emptyTup = NULL, *ret = NULL;
+	PyObject *es;
+	int sneedle = 0, oneedle = 0;
+	if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
+		return NULL;
+	}
+	listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
+	es = PyBytes_FromString("");
+	if (!es) {
+		goto nomem;
+	}
+	emptyTup = PyTuple_Pack(2, Py_None, es);
+	Py_DECREF(es);
+	if (!emptyTup) {
+		goto nomem;
+	}
+	ret = PyDict_New();
+	if (!ret) {
+		goto nomem;
+	}
+	while (sneedle != self->numlines || oneedle != other->numlines) {
+		line *left = self->lines + sneedle;
+		line *right = other->lines + oneedle;
+		int result;
+		PyObject *key;
+		PyObject *outer;
+		/* If we're looking at a deleted entry and it's not
+		 * the end of the manifest, just skip it. */
+		if (left->deleted && sneedle < self->numlines) {
+			sneedle++;
+			continue;
+		}
+		if (right->deleted && oneedle < other->numlines) {
+			oneedle++;
+			continue;
+		}
+		/* if we're at the end of either manifest, then we
+		 * know the remaining items are adds so we can skip
+		 * the strcmp. */
+		if (sneedle == self->numlines) {
+			result = 1;
+		} else if (oneedle == other->numlines) {
+			result = -1;
+		} else {
+			result = linecmp(left, right);
+		}
+		key = result <= 0 ?
+			PyBytes_FromString(left->start) :
+			PyBytes_FromString(right->start);
+		if (!key)
+			goto nomem;
+		if (result < 0) {
+			PyObject *l = hashflags(left);
+			if (!l) {
+				goto nomem;
+			}
+			outer = PyTuple_Pack(2, l, emptyTup);
+			Py_DECREF(l);
+			if (!outer) {
+				goto nomem;
+			}
+			PyDict_SetItem(ret, key, outer);
+			Py_DECREF(outer);
+			sneedle++;
+		} else if (result > 0) {
+			PyObject *r = hashflags(right);
+			if (!r) {
+				goto nomem;
+			}
+			outer = PyTuple_Pack(2, emptyTup, r);
+			Py_DECREF(r);
+			if (!outer) {
+				goto nomem;
+			}
+			PyDict_SetItem(ret, key, outer);
+			Py_DECREF(outer);
+			oneedle++;
+		} else {
+			/* file exists in both manifests */
+			if (left->len != right->len
+			    || memcmp(left->start, right->start, left->len)
+			    || left->hash_suffix != right->hash_suffix) {
+				PyObject *l = hashflags(left);
+				PyObject *r;
+				if (!l) {
+					goto nomem;
+				}
+				r = hashflags(right);
+				if (!r) {
+					Py_DECREF(l);
+					goto nomem;
+				}
+				outer = PyTuple_Pack(2, l, r);
+				Py_DECREF(l);
+				Py_DECREF(r);
+				if (!outer) {
+					goto nomem;
+				}
+				PyDict_SetItem(ret, key, outer);
+				Py_DECREF(outer);
+			} else if (listclean) {
+				PyDict_SetItem(ret, key, Py_None);
+			}
+			sneedle++;
+			oneedle++;
+		}
+		Py_DECREF(key);
+	}
+	Py_DECREF(emptyTup);
+	return ret;
+nomem:
+	PyErr_NoMemory();
+	Py_XDECREF(ret);
+	Py_XDECREF(emptyTup);
+	return NULL;
+}
+
+static PyMethodDef lazymanifest_methods[] = {
+	{"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
+	 "Iterate over file names in this lazymanifest."},
+	{"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
+	 "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
+	{"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
+	 "Make a copy of this lazymanifest."},
+	{"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
+	 "Make a copy of this manifest filtered by matchfn."},
+	{"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
+	 "Compare this lazymanifest to another one."},
+	{"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
+	 "Encode this manifest to text."},
+	{NULL},
+};
+
+#ifdef IS_PY3K
+#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
+#else
+#define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
+#endif
+
+static PyTypeObject lazymanifestType = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	"parsers.lazymanifest",                           /* tp_name */
+	sizeof(lazymanifest),                             /* tp_basicsize */
+	0,                                                /* tp_itemsize */
+	(destructor)lazymanifest_dealloc,                 /* tp_dealloc */
+	0,                                                /* tp_print */
+	0,                                                /* tp_getattr */
+	0,                                                /* tp_setattr */
+	0,                                                /* tp_compare */
+	0,                                                /* tp_repr */
+	0,                                                /* tp_as_number */
+	&lazymanifest_seq_meths,                          /* tp_as_sequence */
+	&lazymanifest_mapping_methods,                    /* tp_as_mapping */
+	0,                                                /* tp_hash */
+	0,                                                /* tp_call */
+	0,                                                /* tp_str */
+	0,                                                /* tp_getattro */
+	0,                                                /* tp_setattro */
+	0,                                                /* tp_as_buffer */
+	LAZYMANIFEST_TPFLAGS,                             /* tp_flags */
+	"TODO(augie)",                                    /* tp_doc */
+	0,                                                /* tp_traverse */
+	0,                                                /* tp_clear */
+	0,                                                /* tp_richcompare */
+	0,                                             /* tp_weaklistoffset */
+	(getiterfunc)lazymanifest_getkeysiter,                /* tp_iter */
+	0,                                                /* tp_iternext */
+	lazymanifest_methods,                             /* tp_methods */
+	0,                                                /* tp_members */
+	0,                                                /* tp_getset */
+	0,                                                /* tp_base */
+	0,                                                /* tp_dict */
+	0,                                                /* tp_descr_get */
+	0,                                                /* tp_descr_set */
+	0,                                                /* tp_dictoffset */
+	(initproc)lazymanifest_init,                      /* tp_init */
+	0,                                                /* tp_alloc */
+};
+
+void manifest_module_init(PyObject * mod)
+{
+	lazymanifestType.tp_new = PyType_GenericNew;
+	if (PyType_Ready(&lazymanifestType) < 0)
+		return;
+	Py_INCREF(&lazymanifestType);
+
+	PyModule_AddObject(mod, "lazymanifest",
+			   (PyObject *)&lazymanifestType);
+}