mercurial/manifest.c
branchstable
changeset 33572 857876ebaed4
parent 33202 c1994c986d77
parent 33571 9a944e908ecf
child 33573 9e0fea06ae2c
equal deleted inserted replaced
33202:c1994c986d77 33572:857876ebaed4
     1 /*
       
     2  * manifest.c - manifest type that does on-demand parsing.
       
     3  *
       
     4  * Copyright 2015, Google Inc.
       
     5  *
       
     6  * This software may be used and distributed according to the terms of
       
     7  * the GNU General Public License, incorporated herein by reference.
       
     8  */
       
     9 #include <Python.h>
       
    10 
       
    11 #include <assert.h>
       
    12 #include <string.h>
       
    13 #include <stdlib.h>
       
    14 
       
    15 #include "util.h"
       
    16 
       
    17 #define DEFAULT_LINES 100000
       
    18 
       
    19 typedef struct {
       
    20 	char *start;
       
    21 	Py_ssize_t len; /* length of line including terminal newline */
       
    22 	char hash_suffix;
       
    23 	bool from_malloc;
       
    24 	bool deleted;
       
    25 } line;
       
    26 
       
    27 typedef struct {
       
    28 	PyObject_HEAD
       
    29 	PyObject *pydata;
       
    30 	line *lines;
       
    31 	int numlines; /* number of line entries */
       
    32 	int livelines; /* number of non-deleted lines */
       
    33 	int maxlines; /* allocated number of lines */
       
    34 	bool dirty;
       
    35 } lazymanifest;
       
    36 
       
    37 #define MANIFEST_OOM -1
       
    38 #define MANIFEST_NOT_SORTED -2
       
    39 #define MANIFEST_MALFORMED -3
       
    40 
       
    41 /* defined in parsers.c */
       
    42 PyObject *unhexlify(const char *str, int len);
       
    43 
       
    44 /* get the length of the path for a line */
       
    45 static size_t pathlen(line *l) {
       
    46 	return strlen(l->start);
       
    47 }
       
    48 
       
    49 /* get the node value of a single line */
       
    50 static PyObject *nodeof(line *l) {
       
    51 	char *s = l->start;
       
    52 	ssize_t llen = pathlen(l);
       
    53 	PyObject *hash = unhexlify(s + llen + 1, 40);
       
    54 	if (!hash) {
       
    55 		return NULL;
       
    56 	}
       
    57 	if (l->hash_suffix != '\0') {
       
    58 		char newhash[21];
       
    59 		memcpy(newhash, PyBytes_AsString(hash), 20);
       
    60 		Py_DECREF(hash);
       
    61 		newhash[20] = l->hash_suffix;
       
    62 		hash = PyBytes_FromStringAndSize(newhash, 21);
       
    63 	}
       
    64 	return hash;
       
    65 }
       
    66 
       
    67 /* get the node hash and flags of a line as a tuple */
       
    68 static PyObject *hashflags(line *l)
       
    69 {
       
    70 	char *s = l->start;
       
    71 	size_t plen = pathlen(l);
       
    72 	PyObject *hash = nodeof(l);
       
    73 
       
    74 	/* 40 for hash, 1 for null byte, 1 for newline */
       
    75 	size_t hplen = plen + 42;
       
    76 	Py_ssize_t flen = l->len - hplen;
       
    77 	PyObject *flags;
       
    78 	PyObject *tup;
       
    79 
       
    80 	if (!hash)
       
    81 		return NULL;
       
    82 	flags = PyBytes_FromStringAndSize(s + hplen - 1, flen);
       
    83 	if (!flags) {
       
    84 		Py_DECREF(hash);
       
    85 		return NULL;
       
    86 	}
       
    87 	tup = PyTuple_Pack(2, hash, flags);
       
    88 	Py_DECREF(flags);
       
    89 	Py_DECREF(hash);
       
    90 	return tup;
       
    91 }
       
    92 
       
    93 /* if we're about to run out of space in the line index, add more */
       
    94 static bool realloc_if_full(lazymanifest *self)
       
    95 {
       
    96 	if (self->numlines == self->maxlines) {
       
    97 		self->maxlines *= 2;
       
    98 		self->lines = realloc(self->lines, self->maxlines * sizeof(line));
       
    99 	}
       
   100 	return !!self->lines;
       
   101 }
       
   102 
       
   103 /*
       
   104  * Find the line boundaries in the manifest that 'data' points to and store
       
   105  * information about each line in 'self'.
       
   106  */
       
   107 static int find_lines(lazymanifest *self, char *data, Py_ssize_t len)
       
   108 {
       
   109 	char *prev = NULL;
       
   110 	while (len > 0) {
       
   111 		line *l;
       
   112 		char *next = memchr(data, '\n', len);
       
   113 		if (!next) {
       
   114 			return MANIFEST_MALFORMED;
       
   115 		}
       
   116 		next++; /* advance past newline */
       
   117 		if (!realloc_if_full(self)) {
       
   118 			return MANIFEST_OOM; /* no memory */
       
   119 		}
       
   120 		if (prev && strcmp(prev, data) > -1) {
       
   121 			/* This data isn't sorted, so we have to abort. */
       
   122 			return MANIFEST_NOT_SORTED;
       
   123 		}
       
   124 		l = self->lines + ((self->numlines)++);
       
   125 		l->start = data;
       
   126 		l->len = next - data;
       
   127 		l->hash_suffix = '\0';
       
   128 		l->from_malloc = false;
       
   129 		l->deleted = false;
       
   130 		len = len - l->len;
       
   131 		prev = data;
       
   132 		data = next;
       
   133 	}
       
   134 	self->livelines = self->numlines;
       
   135 	return 0;
       
   136 }
       
   137 
       
   138 static int lazymanifest_init(lazymanifest *self, PyObject *args)
       
   139 {
       
   140 	char *data;
       
   141 	Py_ssize_t len;
       
   142 	int err, ret;
       
   143 	PyObject *pydata;
       
   144 	if (!PyArg_ParseTuple(args, "S", &pydata)) {
       
   145 		return -1;
       
   146 	}
       
   147 	err = PyBytes_AsStringAndSize(pydata, &data, &len);
       
   148 
       
   149 	self->dirty = false;
       
   150 	if (err == -1)
       
   151 		return -1;
       
   152 	self->pydata = pydata;
       
   153 	Py_INCREF(self->pydata);
       
   154 	Py_BEGIN_ALLOW_THREADS
       
   155 	self->lines = malloc(DEFAULT_LINES * sizeof(line));
       
   156 	self->maxlines = DEFAULT_LINES;
       
   157 	self->numlines = 0;
       
   158 	if (!self->lines)
       
   159 		ret = MANIFEST_OOM;
       
   160 	else
       
   161 		ret = find_lines(self, data, len);
       
   162 	Py_END_ALLOW_THREADS
       
   163 	switch (ret) {
       
   164 	case 0:
       
   165 		break;
       
   166 	case MANIFEST_OOM:
       
   167 		PyErr_NoMemory();
       
   168 		break;
       
   169 	case MANIFEST_NOT_SORTED:
       
   170 		PyErr_Format(PyExc_ValueError,
       
   171 			     "Manifest lines not in sorted order.");
       
   172 		break;
       
   173 	case MANIFEST_MALFORMED:
       
   174 		PyErr_Format(PyExc_ValueError,
       
   175 			     "Manifest did not end in a newline.");
       
   176 		break;
       
   177 	default:
       
   178 		PyErr_Format(PyExc_ValueError,
       
   179 			     "Unknown problem parsing manifest.");
       
   180 	}
       
   181 	return ret == 0 ? 0 : -1;
       
   182 }
       
   183 
       
   184 static void lazymanifest_dealloc(lazymanifest *self)
       
   185 {
       
   186 	/* free any extra lines we had to allocate */
       
   187 	int i;
       
   188 	for (i = 0; i < self->numlines; i++) {
       
   189 		if (self->lines[i].from_malloc) {
       
   190 			free(self->lines[i].start);
       
   191 		}
       
   192 	}
       
   193 	if (self->lines) {
       
   194 		free(self->lines);
       
   195 		self->lines = NULL;
       
   196 	}
       
   197 	if (self->pydata) {
       
   198 		Py_DECREF(self->pydata);
       
   199 		self->pydata = NULL;
       
   200 	}
       
   201 	PyObject_Del(self);
       
   202 }
       
   203 
       
   204 /* iteration support */
       
   205 
       
   206 typedef struct {
       
   207 	PyObject_HEAD lazymanifest *m;
       
   208 	Py_ssize_t pos;
       
   209 } lmIter;
       
   210 
       
   211 static void lmiter_dealloc(PyObject *o)
       
   212 {
       
   213 	lmIter *self = (lmIter *)o;
       
   214 	Py_DECREF(self->m);
       
   215 	PyObject_Del(self);
       
   216 }
       
   217 
       
   218 static line *lmiter_nextline(lmIter *self)
       
   219 {
       
   220 	do {
       
   221 		self->pos++;
       
   222 		if (self->pos >= self->m->numlines) {
       
   223 			return NULL;
       
   224 		}
       
   225 		/* skip over deleted manifest entries */
       
   226 	} while (self->m->lines[self->pos].deleted);
       
   227 	return self->m->lines + self->pos;
       
   228 }
       
   229 
       
   230 static PyObject *lmiter_iterentriesnext(PyObject *o)
       
   231 {
       
   232 	size_t pl;
       
   233 	line *l;
       
   234 	Py_ssize_t consumed;
       
   235 	PyObject *ret = NULL, *path = NULL, *hash = NULL, *flags = NULL;
       
   236 	l = lmiter_nextline((lmIter *)o);
       
   237 	if (!l) {
       
   238 		goto done;
       
   239 	}
       
   240 	pl = pathlen(l);
       
   241 	path = PyBytes_FromStringAndSize(l->start, pl);
       
   242 	hash = nodeof(l);
       
   243 	consumed = pl + 41;
       
   244 	flags = PyBytes_FromStringAndSize(l->start + consumed,
       
   245 					   l->len - consumed - 1);
       
   246 	if (!path || !hash || !flags) {
       
   247 		goto done;
       
   248 	}
       
   249 	ret = PyTuple_Pack(3, path, hash, flags);
       
   250 done:
       
   251 	Py_XDECREF(path);
       
   252 	Py_XDECREF(hash);
       
   253 	Py_XDECREF(flags);
       
   254 	return ret;
       
   255 }
       
   256 
       
   257 #ifdef IS_PY3K
       
   258 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
       
   259 #else
       
   260 #define LAZYMANIFESTENTRIESITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
       
   261 	| Py_TPFLAGS_HAVE_ITER
       
   262 #endif
       
   263 
       
   264 static PyTypeObject lazymanifestEntriesIterator = {
       
   265 	PyVarObject_HEAD_INIT(NULL, 0)
       
   266 	"parsers.lazymanifest.entriesiterator", /*tp_name */
       
   267 	sizeof(lmIter),                  /*tp_basicsize */
       
   268 	0,                               /*tp_itemsize */
       
   269 	lmiter_dealloc,                  /*tp_dealloc */
       
   270 	0,                               /*tp_print */
       
   271 	0,                               /*tp_getattr */
       
   272 	0,                               /*tp_setattr */
       
   273 	0,                               /*tp_compare */
       
   274 	0,                               /*tp_repr */
       
   275 	0,                               /*tp_as_number */
       
   276 	0,                               /*tp_as_sequence */
       
   277 	0,                               /*tp_as_mapping */
       
   278 	0,                               /*tp_hash */
       
   279 	0,                               /*tp_call */
       
   280 	0,                               /*tp_str */
       
   281 	0,                               /*tp_getattro */
       
   282 	0,                               /*tp_setattro */
       
   283 	0,                               /*tp_as_buffer */
       
   284 	LAZYMANIFESTENTRIESITERATOR_TPFLAGS, /* tp_flags */
       
   285 	"Iterator for 3-tuples in a lazymanifest.",  /* tp_doc */
       
   286 	0,                               /* tp_traverse */
       
   287 	0,                               /* tp_clear */
       
   288 	0,                               /* tp_richcompare */
       
   289 	0,                               /* tp_weaklistoffset */
       
   290 	PyObject_SelfIter,               /* tp_iter: __iter__() method */
       
   291 	lmiter_iterentriesnext,          /* tp_iternext: next() method */
       
   292 };
       
   293 
       
   294 static PyObject *lmiter_iterkeysnext(PyObject *o)
       
   295 {
       
   296 	size_t pl;
       
   297 	line *l = lmiter_nextline((lmIter *)o);
       
   298 	if (!l) {
       
   299 		return NULL;
       
   300 	}
       
   301 	pl = pathlen(l);
       
   302 	return PyBytes_FromStringAndSize(l->start, pl);
       
   303 }
       
   304 
       
   305 #ifdef IS_PY3K
       
   306 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT
       
   307 #else
       
   308 #define LAZYMANIFESTKEYSITERATOR_TPFLAGS Py_TPFLAGS_DEFAULT \
       
   309 	| Py_TPFLAGS_HAVE_ITER
       
   310 #endif
       
   311 
       
   312 static PyTypeObject lazymanifestKeysIterator = {
       
   313 	PyVarObject_HEAD_INIT(NULL, 0)
       
   314 	"parsers.lazymanifest.keysiterator", /*tp_name */
       
   315 	sizeof(lmIter),                  /*tp_basicsize */
       
   316 	0,                               /*tp_itemsize */
       
   317 	lmiter_dealloc,                  /*tp_dealloc */
       
   318 	0,                               /*tp_print */
       
   319 	0,                               /*tp_getattr */
       
   320 	0,                               /*tp_setattr */
       
   321 	0,                               /*tp_compare */
       
   322 	0,                               /*tp_repr */
       
   323 	0,                               /*tp_as_number */
       
   324 	0,                               /*tp_as_sequence */
       
   325 	0,                               /*tp_as_mapping */
       
   326 	0,                               /*tp_hash */
       
   327 	0,                               /*tp_call */
       
   328 	0,                               /*tp_str */
       
   329 	0,                               /*tp_getattro */
       
   330 	0,                               /*tp_setattro */
       
   331 	0,                               /*tp_as_buffer */
       
   332 	LAZYMANIFESTKEYSITERATOR_TPFLAGS, /* tp_flags */
       
   333 	"Keys iterator for a lazymanifest.",  /* tp_doc */
       
   334 	0,                               /* tp_traverse */
       
   335 	0,                               /* tp_clear */
       
   336 	0,                               /* tp_richcompare */
       
   337 	0,                               /* tp_weaklistoffset */
       
   338 	PyObject_SelfIter,               /* tp_iter: __iter__() method */
       
   339 	lmiter_iterkeysnext,             /* tp_iternext: next() method */
       
   340 };
       
   341 
       
   342 static lazymanifest *lazymanifest_copy(lazymanifest *self);
       
   343 
       
   344 static PyObject *lazymanifest_getentriesiter(lazymanifest *self)
       
   345 {
       
   346 	lmIter *i = NULL;
       
   347 	lazymanifest *t = lazymanifest_copy(self);
       
   348 	if (!t) {
       
   349 		PyErr_NoMemory();
       
   350 		return NULL;
       
   351 	}
       
   352 	i = PyObject_New(lmIter, &lazymanifestEntriesIterator);
       
   353 	if (i) {
       
   354 		i->m = t;
       
   355 		i->pos = -1;
       
   356 	} else {
       
   357 		Py_DECREF(t);
       
   358 		PyErr_NoMemory();
       
   359 	}
       
   360 	return (PyObject *)i;
       
   361 }
       
   362 
       
   363 static PyObject *lazymanifest_getkeysiter(lazymanifest *self)
       
   364 {
       
   365 	lmIter *i = NULL;
       
   366 	lazymanifest *t = lazymanifest_copy(self);
       
   367 	if (!t) {
       
   368 		PyErr_NoMemory();
       
   369 		return NULL;
       
   370 	}
       
   371 	i = PyObject_New(lmIter, &lazymanifestKeysIterator);
       
   372 	if (i) {
       
   373 		i->m = t;
       
   374 		i->pos = -1;
       
   375 	} else {
       
   376 		Py_DECREF(t);
       
   377 		PyErr_NoMemory();
       
   378 	}
       
   379 	return (PyObject *)i;
       
   380 }
       
   381 
       
   382 /* __getitem__ and __setitem__ support */
       
   383 
       
   384 static Py_ssize_t lazymanifest_size(lazymanifest *self)
       
   385 {
       
   386 	return self->livelines;
       
   387 }
       
   388 
       
   389 static int linecmp(const void *left, const void *right)
       
   390 {
       
   391 	return strcmp(((const line *)left)->start,
       
   392 		      ((const line *)right)->start);
       
   393 }
       
   394 
       
   395 static PyObject *lazymanifest_getitem(lazymanifest *self, PyObject *key)
       
   396 {
       
   397 	line needle;
       
   398 	line *hit;
       
   399 	if (!PyBytes_Check(key)) {
       
   400 		PyErr_Format(PyExc_TypeError,
       
   401 			     "getitem: manifest keys must be a string.");
       
   402 		return NULL;
       
   403 	}
       
   404 	needle.start = PyBytes_AsString(key);
       
   405 	hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
       
   406 		      &linecmp);
       
   407 	if (!hit || hit->deleted) {
       
   408 		PyErr_Format(PyExc_KeyError, "No such manifest entry.");
       
   409 		return NULL;
       
   410 	}
       
   411 	return hashflags(hit);
       
   412 }
       
   413 
       
   414 static int lazymanifest_delitem(lazymanifest *self, PyObject *key)
       
   415 {
       
   416 	line needle;
       
   417 	line *hit;
       
   418 	if (!PyBytes_Check(key)) {
       
   419 		PyErr_Format(PyExc_TypeError,
       
   420 			     "delitem: manifest keys must be a string.");
       
   421 		return -1;
       
   422 	}
       
   423 	needle.start = PyBytes_AsString(key);
       
   424 	hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
       
   425 		      &linecmp);
       
   426 	if (!hit || hit->deleted) {
       
   427 		PyErr_Format(PyExc_KeyError,
       
   428 			     "Tried to delete nonexistent manifest entry.");
       
   429 		return -1;
       
   430 	}
       
   431 	self->dirty = true;
       
   432 	hit->deleted = true;
       
   433 	self->livelines--;
       
   434 	return 0;
       
   435 }
       
   436 
       
   437 /* Do a binary search for the insertion point for new, creating the
       
   438  * new entry if needed. */
       
   439 static int internalsetitem(lazymanifest *self, line *new) {
       
   440 	int start = 0, end = self->numlines;
       
   441 	while (start < end) {
       
   442 		int pos = start + (end - start) / 2;
       
   443 		int c = linecmp(new, self->lines + pos);
       
   444 		if (c < 0)
       
   445 			end = pos;
       
   446 		else if (c > 0)
       
   447 			start = pos + 1;
       
   448 		else {
       
   449 			if (self->lines[pos].deleted)
       
   450 				self->livelines++;
       
   451 			if (self->lines[pos].from_malloc)
       
   452 				free(self->lines[pos].start);
       
   453 			start = pos;
       
   454 			goto finish;
       
   455 		}
       
   456 	}
       
   457 	/* being here means we need to do an insert */
       
   458 	if (!realloc_if_full(self)) {
       
   459 		PyErr_NoMemory();
       
   460 		return -1;
       
   461 	}
       
   462 	memmove(self->lines + start + 1, self->lines + start,
       
   463 		(self->numlines - start) * sizeof(line));
       
   464 	self->numlines++;
       
   465 	self->livelines++;
       
   466 finish:
       
   467 	self->lines[start] = *new;
       
   468 	self->dirty = true;
       
   469 	return 0;
       
   470 }
       
   471 
       
   472 static int lazymanifest_setitem(
       
   473 	lazymanifest *self, PyObject *key, PyObject *value)
       
   474 {
       
   475 	char *path;
       
   476 	Py_ssize_t plen;
       
   477 	PyObject *pyhash;
       
   478 	Py_ssize_t hlen;
       
   479 	char *hash;
       
   480 	PyObject *pyflags;
       
   481 	char *flags;
       
   482 	Py_ssize_t flen;
       
   483 	size_t dlen;
       
   484 	char *dest;
       
   485 	int i;
       
   486 	line new;
       
   487 	if (!PyBytes_Check(key)) {
       
   488 		PyErr_Format(PyExc_TypeError,
       
   489 			     "setitem: manifest keys must be a string.");
       
   490 		return -1;
       
   491 	}
       
   492 	if (!value) {
       
   493 		return lazymanifest_delitem(self, key);
       
   494 	}
       
   495 	if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
       
   496 		PyErr_Format(PyExc_TypeError,
       
   497 			     "Manifest values must be a tuple of (node, flags).");
       
   498 		return -1;
       
   499 	}
       
   500 	if (PyBytes_AsStringAndSize(key, &path, &plen) == -1) {
       
   501 		return -1;
       
   502 	}
       
   503 
       
   504 	pyhash = PyTuple_GetItem(value, 0);
       
   505 	if (!PyBytes_Check(pyhash)) {
       
   506 		PyErr_Format(PyExc_TypeError,
       
   507 			     "node must be a 20-byte string");
       
   508 		return -1;
       
   509 	}
       
   510 	hlen = PyBytes_Size(pyhash);
       
   511 	/* Some parts of the codebase try and set 21 or 22
       
   512 	 * byte "hash" values in order to perturb things for
       
   513 	 * status. We have to preserve at least the 21st
       
   514 	 * byte. Sigh. If there's a 22nd byte, we drop it on
       
   515 	 * the floor, which works fine.
       
   516 	 */
       
   517 	if (hlen != 20 && hlen != 21 && hlen != 22) {
       
   518 		PyErr_Format(PyExc_TypeError,
       
   519 			     "node must be a 20-byte string");
       
   520 		return -1;
       
   521 	}
       
   522 	hash = PyBytes_AsString(pyhash);
       
   523 
       
   524 	pyflags = PyTuple_GetItem(value, 1);
       
   525 	if (!PyBytes_Check(pyflags) || PyBytes_Size(pyflags) > 1) {
       
   526 		PyErr_Format(PyExc_TypeError,
       
   527 			     "flags must a 0 or 1 byte string");
       
   528 		return -1;
       
   529 	}
       
   530 	if (PyBytes_AsStringAndSize(pyflags, &flags, &flen) == -1) {
       
   531 		return -1;
       
   532 	}
       
   533 	/* one null byte and one newline */
       
   534 	dlen = plen + 41 + flen + 1;
       
   535 	dest = malloc(dlen);
       
   536 	if (!dest) {
       
   537 		PyErr_NoMemory();
       
   538 		return -1;
       
   539 	}
       
   540 	memcpy(dest, path, plen + 1);
       
   541 	for (i = 0; i < 20; i++) {
       
   542 		/* Cast to unsigned, so it will not get sign-extended when promoted
       
   543 		 * to int (as is done when passing to a variadic function)
       
   544 		 */
       
   545 		sprintf(dest + plen + 1 + (i * 2), "%02x", (unsigned char)hash[i]);
       
   546 	}
       
   547 	memcpy(dest + plen + 41, flags, flen);
       
   548 	dest[plen + 41 + flen] = '\n';
       
   549 	new.start = dest;
       
   550 	new.len = dlen;
       
   551 	new.hash_suffix = '\0';
       
   552 	if (hlen > 20) {
       
   553 		new.hash_suffix = hash[20];
       
   554 	}
       
   555 	new.from_malloc = true;     /* is `start` a pointer we allocated? */
       
   556 	new.deleted = false;        /* is this entry deleted? */
       
   557 	if (internalsetitem(self, &new)) {
       
   558 		return -1;
       
   559 	}
       
   560 	return 0;
       
   561 }
       
   562 
       
   563 static PyMappingMethods lazymanifest_mapping_methods = {
       
   564 	(lenfunc)lazymanifest_size,             /* mp_length */
       
   565 	(binaryfunc)lazymanifest_getitem,       /* mp_subscript */
       
   566 	(objobjargproc)lazymanifest_setitem,    /* mp_ass_subscript */
       
   567 };
       
   568 
       
   569 /* sequence methods (important or __contains__ builds an iterator) */
       
   570 
       
   571 static int lazymanifest_contains(lazymanifest *self, PyObject *key)
       
   572 {
       
   573 	line needle;
       
   574 	line *hit;
       
   575 	if (!PyBytes_Check(key)) {
       
   576 		/* Our keys are always strings, so if the contains
       
   577 		 * check is for a non-string, just return false. */
       
   578 		return 0;
       
   579 	}
       
   580 	needle.start = PyBytes_AsString(key);
       
   581 	hit = bsearch(&needle, self->lines, self->numlines, sizeof(line),
       
   582 		      &linecmp);
       
   583 	if (!hit || hit->deleted) {
       
   584 		return 0;
       
   585 	}
       
   586 	return 1;
       
   587 }
       
   588 
       
   589 static PySequenceMethods lazymanifest_seq_meths = {
       
   590 	(lenfunc)lazymanifest_size, /* sq_length */
       
   591 	0, /* sq_concat */
       
   592 	0, /* sq_repeat */
       
   593 	0, /* sq_item */
       
   594 	0, /* sq_slice */
       
   595 	0, /* sq_ass_item */
       
   596 	0, /* sq_ass_slice */
       
   597 	(objobjproc)lazymanifest_contains, /* sq_contains */
       
   598 	0, /* sq_inplace_concat */
       
   599 	0, /* sq_inplace_repeat */
       
   600 };
       
   601 
       
   602 
       
   603 /* Other methods (copy, diff, etc) */
       
   604 static PyTypeObject lazymanifestType;
       
   605 
       
   606 /* If the manifest has changes, build the new manifest text and reindex it. */
       
   607 static int compact(lazymanifest *self) {
       
   608 	int i;
       
   609 	ssize_t need = 0;
       
   610 	char *data;
       
   611 	line *src, *dst;
       
   612 	PyObject *pydata;
       
   613 	if (!self->dirty)
       
   614 		return 0;
       
   615 	for (i = 0; i < self->numlines; i++) {
       
   616 		if (!self->lines[i].deleted) {
       
   617 			need += self->lines[i].len;
       
   618 		}
       
   619 	}
       
   620 	pydata = PyBytes_FromStringAndSize(NULL, need);
       
   621 	if (!pydata)
       
   622 		return -1;
       
   623 	data = PyBytes_AsString(pydata);
       
   624 	if (!data) {
       
   625 		return -1;
       
   626 	}
       
   627 	src = self->lines;
       
   628 	dst = self->lines;
       
   629 	for (i = 0; i < self->numlines; i++, src++) {
       
   630 		char *tofree = NULL;
       
   631 		if (src->from_malloc) {
       
   632 			tofree = src->start;
       
   633 		}
       
   634 		if (!src->deleted) {
       
   635 			memcpy(data, src->start, src->len);
       
   636 			*dst = *src;
       
   637 			dst->start = data;
       
   638 			dst->from_malloc = false;
       
   639 			data += dst->len;
       
   640 			dst++;
       
   641 		}
       
   642 		free(tofree);
       
   643 	}
       
   644 	Py_DECREF(self->pydata);
       
   645 	self->pydata = pydata;
       
   646 	self->numlines = self->livelines;
       
   647 	self->dirty = false;
       
   648 	return 0;
       
   649 }
       
   650 
       
   651 static PyObject *lazymanifest_text(lazymanifest *self)
       
   652 {
       
   653 	if (compact(self) != 0) {
       
   654 		PyErr_NoMemory();
       
   655 		return NULL;
       
   656 	}
       
   657 	Py_INCREF(self->pydata);
       
   658 	return self->pydata;
       
   659 }
       
   660 
       
   661 static lazymanifest *lazymanifest_copy(lazymanifest *self)
       
   662 {
       
   663 	lazymanifest *copy = NULL;
       
   664 	if (compact(self) != 0) {
       
   665 		goto nomem;
       
   666 	}
       
   667 	copy = PyObject_New(lazymanifest, &lazymanifestType);
       
   668 	if (!copy) {
       
   669 		goto nomem;
       
   670 	}
       
   671 	copy->numlines = self->numlines;
       
   672 	copy->livelines = self->livelines;
       
   673 	copy->dirty = false;
       
   674 	copy->lines = malloc(self->maxlines *sizeof(line));
       
   675 	if (!copy->lines) {
       
   676 		goto nomem;
       
   677 	}
       
   678 	memcpy(copy->lines, self->lines, self->numlines * sizeof(line));
       
   679 	copy->maxlines = self->maxlines;
       
   680 	copy->pydata = self->pydata;
       
   681 	Py_INCREF(copy->pydata);
       
   682 	return copy;
       
   683 nomem:
       
   684 	PyErr_NoMemory();
       
   685 	Py_XDECREF(copy);
       
   686 	return NULL;
       
   687 }
       
   688 
       
   689 static lazymanifest *lazymanifest_filtercopy(
       
   690 	lazymanifest *self, PyObject *matchfn)
       
   691 {
       
   692 	lazymanifest *copy = NULL;
       
   693 	int i;
       
   694 	if (!PyCallable_Check(matchfn)) {
       
   695 		PyErr_SetString(PyExc_TypeError, "matchfn must be callable");
       
   696 		return NULL;
       
   697 	}
       
   698 	/* compact ourselves first to avoid double-frees later when we
       
   699 	 * compact tmp so that it doesn't have random pointers to our
       
   700 	 * underlying from_malloc-data (self->pydata is safe) */
       
   701 	if (compact(self) != 0) {
       
   702 		goto nomem;
       
   703 	}
       
   704 	copy = PyObject_New(lazymanifest, &lazymanifestType);
       
   705 	if (!copy) {
       
   706 		goto nomem;
       
   707 	}
       
   708 	copy->dirty = true;
       
   709 	copy->lines = malloc(self->maxlines * sizeof(line));
       
   710 	if (!copy->lines) {
       
   711 		goto nomem;
       
   712 	}
       
   713 	copy->maxlines = self->maxlines;
       
   714 	copy->numlines = 0;
       
   715 	copy->pydata = self->pydata;
       
   716 	Py_INCREF(self->pydata);
       
   717 	for (i = 0; i < self->numlines; i++) {
       
   718 		PyObject *arglist = NULL, *result = NULL;
       
   719 		arglist = Py_BuildValue("(s)", self->lines[i].start);
       
   720 		if (!arglist) {
       
   721 			return NULL;
       
   722 		}
       
   723 		result = PyObject_CallObject(matchfn, arglist);
       
   724 		Py_DECREF(arglist);
       
   725 		/* if the callback raised an exception, just let it
       
   726 		 * through and give up */
       
   727 		if (!result) {
       
   728 			free(copy->lines);
       
   729 			Py_DECREF(self->pydata);
       
   730 			return NULL;
       
   731 		}
       
   732 		if (PyObject_IsTrue(result)) {
       
   733 			assert(!(self->lines[i].from_malloc));
       
   734 			copy->lines[copy->numlines++] = self->lines[i];
       
   735 		}
       
   736 		Py_DECREF(result);
       
   737 	}
       
   738 	copy->livelines = copy->numlines;
       
   739 	return copy;
       
   740 nomem:
       
   741 	PyErr_NoMemory();
       
   742 	Py_XDECREF(copy);
       
   743 	return NULL;
       
   744 }
       
   745 
       
   746 static PyObject *lazymanifest_diff(lazymanifest *self, PyObject *args)
       
   747 {
       
   748 	lazymanifest *other;
       
   749 	PyObject *pyclean = NULL;
       
   750 	bool listclean;
       
   751 	PyObject *emptyTup = NULL, *ret = NULL;
       
   752 	PyObject *es;
       
   753 	int sneedle = 0, oneedle = 0;
       
   754 	if (!PyArg_ParseTuple(args, "O!|O", &lazymanifestType, &other, &pyclean)) {
       
   755 		return NULL;
       
   756 	}
       
   757 	listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
       
   758 	es = PyBytes_FromString("");
       
   759 	if (!es) {
       
   760 		goto nomem;
       
   761 	}
       
   762 	emptyTup = PyTuple_Pack(2, Py_None, es);
       
   763 	Py_DECREF(es);
       
   764 	if (!emptyTup) {
       
   765 		goto nomem;
       
   766 	}
       
   767 	ret = PyDict_New();
       
   768 	if (!ret) {
       
   769 		goto nomem;
       
   770 	}
       
   771 	while (sneedle != self->numlines || oneedle != other->numlines) {
       
   772 		line *left = self->lines + sneedle;
       
   773 		line *right = other->lines + oneedle;
       
   774 		int result;
       
   775 		PyObject *key;
       
   776 		PyObject *outer;
       
   777 		/* If we're looking at a deleted entry and it's not
       
   778 		 * the end of the manifest, just skip it. */
       
   779 		if (left->deleted && sneedle < self->numlines) {
       
   780 			sneedle++;
       
   781 			continue;
       
   782 		}
       
   783 		if (right->deleted && oneedle < other->numlines) {
       
   784 			oneedle++;
       
   785 			continue;
       
   786 		}
       
   787 		/* if we're at the end of either manifest, then we
       
   788 		 * know the remaining items are adds so we can skip
       
   789 		 * the strcmp. */
       
   790 		if (sneedle == self->numlines) {
       
   791 			result = 1;
       
   792 		} else if (oneedle == other->numlines) {
       
   793 			result = -1;
       
   794 		} else {
       
   795 			result = linecmp(left, right);
       
   796 		}
       
   797 		key = result <= 0 ?
       
   798 			PyBytes_FromString(left->start) :
       
   799 			PyBytes_FromString(right->start);
       
   800 		if (!key)
       
   801 			goto nomem;
       
   802 		if (result < 0) {
       
   803 			PyObject *l = hashflags(left);
       
   804 			if (!l) {
       
   805 				goto nomem;
       
   806 			}
       
   807 			outer = PyTuple_Pack(2, l, emptyTup);
       
   808 			Py_DECREF(l);
       
   809 			if (!outer) {
       
   810 				goto nomem;
       
   811 			}
       
   812 			PyDict_SetItem(ret, key, outer);
       
   813 			Py_DECREF(outer);
       
   814 			sneedle++;
       
   815 		} else if (result > 0) {
       
   816 			PyObject *r = hashflags(right);
       
   817 			if (!r) {
       
   818 				goto nomem;
       
   819 			}
       
   820 			outer = PyTuple_Pack(2, emptyTup, r);
       
   821 			Py_DECREF(r);
       
   822 			if (!outer) {
       
   823 				goto nomem;
       
   824 			}
       
   825 			PyDict_SetItem(ret, key, outer);
       
   826 			Py_DECREF(outer);
       
   827 			oneedle++;
       
   828 		} else {
       
   829 			/* file exists in both manifests */
       
   830 			if (left->len != right->len
       
   831 			    || memcmp(left->start, right->start, left->len)
       
   832 			    || left->hash_suffix != right->hash_suffix) {
       
   833 				PyObject *l = hashflags(left);
       
   834 				PyObject *r;
       
   835 				if (!l) {
       
   836 					goto nomem;
       
   837 				}
       
   838 				r = hashflags(right);
       
   839 				if (!r) {
       
   840 					Py_DECREF(l);
       
   841 					goto nomem;
       
   842 				}
       
   843 				outer = PyTuple_Pack(2, l, r);
       
   844 				Py_DECREF(l);
       
   845 				Py_DECREF(r);
       
   846 				if (!outer) {
       
   847 					goto nomem;
       
   848 				}
       
   849 				PyDict_SetItem(ret, key, outer);
       
   850 				Py_DECREF(outer);
       
   851 			} else if (listclean) {
       
   852 				PyDict_SetItem(ret, key, Py_None);
       
   853 			}
       
   854 			sneedle++;
       
   855 			oneedle++;
       
   856 		}
       
   857 		Py_DECREF(key);
       
   858 	}
       
   859 	Py_DECREF(emptyTup);
       
   860 	return ret;
       
   861 nomem:
       
   862 	PyErr_NoMemory();
       
   863 	Py_XDECREF(ret);
       
   864 	Py_XDECREF(emptyTup);
       
   865 	return NULL;
       
   866 }
       
   867 
       
   868 static PyMethodDef lazymanifest_methods[] = {
       
   869 	{"iterkeys", (PyCFunction)lazymanifest_getkeysiter, METH_NOARGS,
       
   870 	 "Iterate over file names in this lazymanifest."},
       
   871 	{"iterentries", (PyCFunction)lazymanifest_getentriesiter, METH_NOARGS,
       
   872 	 "Iterate over (path, nodeid, flags) tuples in this lazymanifest."},
       
   873 	{"copy", (PyCFunction)lazymanifest_copy, METH_NOARGS,
       
   874 	 "Make a copy of this lazymanifest."},
       
   875 	{"filtercopy", (PyCFunction)lazymanifest_filtercopy, METH_O,
       
   876 	 "Make a copy of this manifest filtered by matchfn."},
       
   877 	{"diff", (PyCFunction)lazymanifest_diff, METH_VARARGS,
       
   878 	 "Compare this lazymanifest to another one."},
       
   879 	{"text", (PyCFunction)lazymanifest_text, METH_NOARGS,
       
   880 	 "Encode this manifest to text."},
       
   881 	{NULL},
       
   882 };
       
   883 
       
   884 #ifdef IS_PY3K
       
   885 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT
       
   886 #else
       
   887 #define LAZYMANIFEST_TPFLAGS Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN
       
   888 #endif
       
   889 
       
   890 static PyTypeObject lazymanifestType = {
       
   891 	PyVarObject_HEAD_INIT(NULL, 0)
       
   892 	"parsers.lazymanifest",                           /* tp_name */
       
   893 	sizeof(lazymanifest),                             /* tp_basicsize */
       
   894 	0,                                                /* tp_itemsize */
       
   895 	(destructor)lazymanifest_dealloc,                 /* tp_dealloc */
       
   896 	0,                                                /* tp_print */
       
   897 	0,                                                /* tp_getattr */
       
   898 	0,                                                /* tp_setattr */
       
   899 	0,                                                /* tp_compare */
       
   900 	0,                                                /* tp_repr */
       
   901 	0,                                                /* tp_as_number */
       
   902 	&lazymanifest_seq_meths,                          /* tp_as_sequence */
       
   903 	&lazymanifest_mapping_methods,                    /* tp_as_mapping */
       
   904 	0,                                                /* tp_hash */
       
   905 	0,                                                /* tp_call */
       
   906 	0,                                                /* tp_str */
       
   907 	0,                                                /* tp_getattro */
       
   908 	0,                                                /* tp_setattro */
       
   909 	0,                                                /* tp_as_buffer */
       
   910 	LAZYMANIFEST_TPFLAGS,                             /* tp_flags */
       
   911 	"TODO(augie)",                                    /* tp_doc */
       
   912 	0,                                                /* tp_traverse */
       
   913 	0,                                                /* tp_clear */
       
   914 	0,                                                /* tp_richcompare */
       
   915 	0,                                             /* tp_weaklistoffset */
       
   916 	(getiterfunc)lazymanifest_getkeysiter,                /* tp_iter */
       
   917 	0,                                                /* tp_iternext */
       
   918 	lazymanifest_methods,                             /* tp_methods */
       
   919 	0,                                                /* tp_members */
       
   920 	0,                                                /* tp_getset */
       
   921 	0,                                                /* tp_base */
       
   922 	0,                                                /* tp_dict */
       
   923 	0,                                                /* tp_descr_get */
       
   924 	0,                                                /* tp_descr_set */
       
   925 	0,                                                /* tp_dictoffset */
       
   926 	(initproc)lazymanifest_init,                      /* tp_init */
       
   927 	0,                                                /* tp_alloc */
       
   928 };
       
   929 
       
   930 void manifest_module_init(PyObject * mod)
       
   931 {
       
   932 	lazymanifestType.tp_new = PyType_GenericNew;
       
   933 	if (PyType_Ready(&lazymanifestType) < 0)
       
   934 		return;
       
   935 	Py_INCREF(&lazymanifestType);
       
   936 
       
   937 	PyModule_AddObject(mod, "lazymanifest",
       
   938 			   (PyObject *)&lazymanifestType);
       
   939 }