mercurial/parsers.c
changeset 16414 e8d37b78acfb
parent 16393 ee163a9cf37c
child 16437 d126a0d16856
equal deleted inserted replaced
16413:1a420761fcb7 16414:e8d37b78acfb
   213 	Py_XDECREF(parents);
   213 	Py_XDECREF(parents);
   214 	return ret;
   214 	return ret;
   215 }
   215 }
   216 
   216 
   217 /*
   217 /*
   218  * A list-like object that decodes the contents of a RevlogNG index
   218  * A base-16 trie for fast node->rev mapping.
   219  * file on demand. It has limited support for insert and delete at the
   219  *
   220  * last element before the end.  The last entry is always a sentinel
   220  * Positive value is index of the next node in the trie
   221  * nullid.
   221  * Negative value is a leaf: -(rev + 1)
       
   222  * Zero is empty
       
   223  */
       
   224 typedef struct {
       
   225 	int children[16];
       
   226 } nodetree;
       
   227 
       
   228 /*
       
   229  * This class has two behaviours.
       
   230  *
       
   231  * When used in a list-like way (with integer keys), we decode an
       
   232  * entry in a RevlogNG index file on demand. Our last entry is a
       
   233  * sentinel, always a nullid.  We have limited support for
       
   234  * integer-keyed insert and delete, only at elements right before the
       
   235  * sentinel.
       
   236  *
       
   237  * With string keys, we lazily perform a reverse mapping from node to
       
   238  * rev, using a base-16 trie.
   222  */
   239  */
   223 typedef struct {
   240 typedef struct {
   224 	PyObject_HEAD
   241 	PyObject_HEAD
   225 	/* Type-specific fields go here. */
   242 	/* Type-specific fields go here. */
   226 	PyObject *data;        /* raw bytes of index */
   243 	PyObject *data;        /* raw bytes of index */
   227 	PyObject **cache;      /* cached tuples */
   244 	PyObject **cache;      /* cached tuples */
   228 	const char **offsets;  /* populated on demand */
   245 	const char **offsets;  /* populated on demand */
   229 	Py_ssize_t raw_length; /* original number of elements */
   246 	Py_ssize_t raw_length; /* original number of elements */
   230 	Py_ssize_t length;     /* current number of elements */
   247 	Py_ssize_t length;     /* current number of elements */
   231 	PyObject *added;       /* populated on demand */
   248 	PyObject *added;       /* populated on demand */
       
   249 	nodetree *nt;          /* base-16 trie */
       
   250 	int ntlength;          /* # nodes in use */
       
   251 	int ntcapacity;        /* # nodes allocated */
       
   252 	int ntdepth;           /* maximum depth of tree */
       
   253 	int ntsplits;          /* # splits performed */
       
   254 	int ntrev;             /* last rev scanned */
       
   255 	int ntlookups;         /* # lookups */
       
   256 	int ntmisses;          /* # lookups that miss the cache */
   232 	int inlined;
   257 	int inlined;
   233 } indexObject;
   258 } indexObject;
   234 
   259 
   235 static Py_ssize_t index_length(indexObject *self)
   260 static Py_ssize_t index_length(const indexObject *self)
   236 {
   261 {
   237 	if (self->added == NULL)
   262 	if (self->added == NULL)
   238 		return self->length;
   263 		return self->length;
   239 	return self->length + PyList_GET_SIZE(self->added);
   264 	return self->length + PyList_GET_SIZE(self->added);
   240 }
   265 }
   241 
   266 
   242 static PyObject *nullentry;
   267 static PyObject *nullentry;
       
   268 static const char nullid[20];
   243 
   269 
   244 static long inline_scan(indexObject *self, const char **offsets);
   270 static long inline_scan(indexObject *self, const char **offsets);
   245 
   271 
   246 #if LONG_MAX == 0x7fffffffL
   272 #if LONG_MAX == 0x7fffffffL
   247 static char *tuple_format = "Kiiiiiis#";
   273 static char *tuple_format = "Kiiiiiis#";
   248 #else
   274 #else
   249 static char *tuple_format = "kiiiiiis#";
   275 static char *tuple_format = "kiiiiiis#";
   250 #endif
   276 #endif
   251 
   277 
   252 /* RevlogNG format (all in big endian, data may be inlined):
   278 /*
       
   279  * Return a pointer to the beginning of a RevlogNG record.
       
   280  */
       
   281 static const char *index_deref(indexObject *self, Py_ssize_t pos)
       
   282 {
       
   283 	if (self->inlined && pos > 0) {
       
   284 		if (self->offsets == NULL) {
       
   285 			self->offsets = malloc(self->raw_length *
       
   286 					       sizeof(*self->offsets));
       
   287 			if (self->offsets == NULL)
       
   288 				return (const char *)PyErr_NoMemory();
       
   289 			inline_scan(self, self->offsets);
       
   290 		}
       
   291 		return self->offsets[pos];
       
   292 	}
       
   293 
       
   294 	return PyString_AS_STRING(self->data) + pos * 64;
       
   295 }
       
   296 
       
   297 /*
       
   298  * RevlogNG format (all in big endian, data may be inlined):
   253  *    6 bytes: offset
   299  *    6 bytes: offset
   254  *    2 bytes: flags
   300  *    2 bytes: flags
   255  *    4 bytes: compressed length
   301  *    4 bytes: compressed length
   256  *    4 bytes: uncompressed length
   302  *    4 bytes: uncompressed length
   257  *    4 bytes: base revision
   303  *    4 bytes: base revision
   268 	const char *c_node_id;
   314 	const char *c_node_id;
   269 	const char *data;
   315 	const char *data;
   270 	Py_ssize_t length = index_length(self);
   316 	Py_ssize_t length = index_length(self);
   271 	PyObject *entry;
   317 	PyObject *entry;
   272 
   318 
   273 	if (pos >= length) {
   319 	if (pos < 0)
       
   320 		pos += length;
       
   321 
       
   322 	if (pos < 0 || pos >= length) {
   274 		PyErr_SetString(PyExc_IndexError, "revlog index out of range");
   323 		PyErr_SetString(PyExc_IndexError, "revlog index out of range");
   275 		return NULL;
   324 		return NULL;
   276 	}
   325 	}
   277 
   326 
   278 	if (pos == length - 1) {
   327 	if (pos == length - 1) {
   296 		self->cache = calloc(self->raw_length, sizeof(PyObject *));
   345 		self->cache = calloc(self->raw_length, sizeof(PyObject *));
   297 		if (self->cache == NULL)
   346 		if (self->cache == NULL)
   298 			return PyErr_NoMemory();
   347 			return PyErr_NoMemory();
   299 	}
   348 	}
   300 
   349 
   301 	if (self->inlined && pos > 0) {
   350 	data = index_deref(self, pos);
   302 		if (self->offsets == NULL) {
   351 	if (data == NULL)
   303 			self->offsets = malloc(self->raw_length *
   352 		return NULL;
   304 					       sizeof(*self->offsets));
       
   305 			if (self->offsets == NULL)
       
   306 				return PyErr_NoMemory();
       
   307 			inline_scan(self, self->offsets);
       
   308 		}
       
   309 		data = self->offsets[pos];
       
   310 	} else
       
   311 		data = PyString_AS_STRING(self->data) + pos * 64;
       
   312 
   353 
   313 	memcpy(decode, data, 8 * sizeof(uint32_t));
   354 	memcpy(decode, data, 8 * sizeof(uint32_t));
   314 
   355 
   315 	offset_flags = ntohl(decode[1]);
   356 	offset_flags = ntohl(decode[1]);
   316 	if (pos == 0) /* mask out version number for the first entry */
   357 	if (pos == 0) /* mask out version number for the first entry */
   339 	Py_INCREF(entry);
   380 	Py_INCREF(entry);
   340 
   381 
   341 	return entry;
   382 	return entry;
   342 }
   383 }
   343 
   384 
       
   385 /*
       
   386  * Return the 20-byte SHA of the node corresponding to the given rev.
       
   387  */
       
   388 static const char *index_node(indexObject *self, Py_ssize_t pos)
       
   389 {
       
   390 	Py_ssize_t length = index_length(self);
       
   391 	const char *data;
       
   392 
       
   393 	if (pos == length - 1)
       
   394 		return nullid;
       
   395 
       
   396 	if (pos >= length)
       
   397 		return NULL;
       
   398 
       
   399 	if (pos >= self->length - 1) {
       
   400 		PyObject *tuple, *str;
       
   401 		tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
       
   402 		str = PyTuple_GetItem(tuple, 7);
       
   403 		return str ? PyString_AS_STRING(str) : NULL;
       
   404 	}
       
   405 
       
   406 	data = index_deref(self, pos);
       
   407 	return data ? data + 32 : NULL;
       
   408 }
       
   409 
       
   410 static int nt_insert(indexObject *self, const char *node, int rev);
       
   411 
       
   412 static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
       
   413 {
       
   414 	if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
       
   415 		return -1;
       
   416 	if (*nodelen == 20)
       
   417 		return 0;
       
   418 	PyErr_SetString(PyExc_ValueError, "20-byte hash required");
       
   419 	return -1;
       
   420 }
       
   421 
   344 static PyObject *index_insert(indexObject *self, PyObject *args)
   422 static PyObject *index_insert(indexObject *self, PyObject *args)
   345 {
   423 {
   346 	PyObject *obj, *node;
   424 	PyObject *obj;
       
   425 	char *node;
   347 	long offset;
   426 	long offset;
   348 	Py_ssize_t len;
   427 	Py_ssize_t len, nodelen;
   349 
   428 
   350 	if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
   429 	if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
   351 		return NULL;
   430 		return NULL;
   352 
   431 
   353 	if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
   432 	if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
   354 		PyErr_SetString(PyExc_ValueError, "8-tuple required");
   433 		PyErr_SetString(PyExc_TypeError, "8-tuple required");
   355 		return NULL;
   434 		return NULL;
   356 	}
   435 	}
   357 
   436 
   358 	node = PyTuple_GET_ITEM(obj, 7);
   437 	if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
   359 	if (!PyString_Check(node) || PyString_GET_SIZE(node) != 20) {
   438 		return NULL;
   360 		PyErr_SetString(PyExc_ValueError,
       
   361 				"20-byte hash required as last element");
       
   362 		return NULL;
       
   363 	}
       
   364 
   439 
   365 	len = index_length(self);
   440 	len = index_length(self);
   366 
   441 
   367 	if (offset < 0)
   442 	if (offset < 0)
   368 		offset += len;
   443 		offset += len;
   369 
   444 
   370 	if (offset != len - 1) {
   445 	if (offset != len - 1) {
   371 		PyErr_SetString(PyExc_IndexError,
   446 		PyErr_SetString(PyExc_IndexError,
   372 				"insert only supported at index -1");
   447 				"insert only supported at index -1");
       
   448 		return NULL;
       
   449 	}
       
   450 
       
   451 	if (offset > INT_MAX) {
       
   452 		PyErr_SetString(PyExc_ValueError,
       
   453 				"currently only 2**31 revs supported");
   373 		return NULL;
   454 		return NULL;
   374 	}
   455 	}
   375 
   456 
   376 	if (self->added == NULL) {
   457 	if (self->added == NULL) {
   377 		self->added = PyList_New(0);
   458 		self->added = PyList_New(0);
   380 	}
   461 	}
   381 
   462 
   382 	if (PyList_Append(self->added, obj) == -1)
   463 	if (PyList_Append(self->added, obj) == -1)
   383 		return NULL;
   464 		return NULL;
   384 
   465 
       
   466 	if (self->nt)
       
   467 		nt_insert(self, node, (int)offset);
       
   468 
   385 	Py_RETURN_NONE;
   469 	Py_RETURN_NONE;
   386 }
   470 }
   387 
   471 
   388 static void _index_clearcaches(indexObject *self)
   472 static void _index_clearcaches(indexObject *self)
   389 {
   473 {
   399 	}
   483 	}
   400 	if (self->offsets) {
   484 	if (self->offsets) {
   401 		free(self->offsets);
   485 		free(self->offsets);
   402 		self->offsets = NULL;
   486 		self->offsets = NULL;
   403 	}
   487 	}
       
   488 	if (self->nt) {
       
   489 		free(self->nt);
       
   490 		self->nt = NULL;
       
   491 	}
   404 }
   492 }
   405 
   493 
   406 static PyObject *index_clearcaches(indexObject *self)
   494 static PyObject *index_clearcaches(indexObject *self)
   407 {
   495 {
   408 	_index_clearcaches(self);
   496 	_index_clearcaches(self);
       
   497 	self->ntlength = self->ntcapacity = 0;
       
   498 	self->ntdepth = self->ntsplits = 0;
       
   499 	self->ntrev = -1;
       
   500 	self->ntlookups = self->ntmisses = 0;
   409 	Py_RETURN_NONE;
   501 	Py_RETURN_NONE;
   410 }
   502 }
   411 
   503 
   412 static int index_assign_subscript(indexObject *self, PyObject *item,
   504 static PyObject *index_stats(indexObject *self)
   413 				  PyObject *value)
   505 {
       
   506 	PyObject *obj = PyDict_New();
       
   507 
       
   508 	if (obj == NULL)
       
   509 		return NULL;
       
   510 
       
   511 #define istat(__n, __d) \
       
   512 	if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \
       
   513 		goto bail;
       
   514 
       
   515 	if (self->added) {
       
   516 		Py_ssize_t len = PyList_GET_SIZE(self->added);
       
   517 		if (PyDict_SetItemString(obj, "index entries added",
       
   518 					 PyInt_FromLong(len)) == -1)
       
   519 			goto bail;
       
   520 	}
       
   521 
       
   522 	if (self->raw_length != self->length - 1)
       
   523 		istat(raw_length, "revs on disk");
       
   524 	istat(length, "revs in memory");
       
   525 	istat(ntcapacity, "node trie capacity");
       
   526 	istat(ntdepth, "node trie depth");
       
   527 	istat(ntlength, "node trie count");
       
   528 	istat(ntlookups, "node trie lookups");
       
   529 	istat(ntmisses, "node trie misses");
       
   530 	istat(ntrev, "node trie last rev scanned");
       
   531 	istat(ntsplits, "node trie splits");
       
   532 
       
   533 #undef istat
       
   534 
       
   535 	return obj;
       
   536 
       
   537 bail:
       
   538 	Py_XDECREF(obj);
       
   539 	return NULL;
       
   540 }
       
   541 
       
   542 static inline int nt_level(const char *node, int level)
       
   543 {
       
   544 	int v = node[level>>1];
       
   545 	if (!(level & 1))
       
   546 		v >>= 4;
       
   547 	return v & 0xf;
       
   548 }
       
   549 
       
   550 static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen)
       
   551 {
       
   552 	int level, off;
       
   553 
       
   554 	if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
       
   555 		return -1;
       
   556 
       
   557 	if (self->nt == NULL)
       
   558 		return -2;
       
   559 
       
   560 	for (level = off = 0; level < nodelen; level++) {
       
   561 		int k = nt_level(node, level);
       
   562 		nodetree *n = &self->nt[off];
       
   563 		int v = n->children[k];
       
   564 
       
   565 		if (v < 0) {
       
   566 			const char *n;
       
   567 			v = -v - 1;
       
   568 			n = index_node(self, v);
       
   569 			if (n == NULL)
       
   570 				return -2;
       
   571 			return memcmp(node, n, nodelen > 20 ? 20 : nodelen)
       
   572 				? -2 : v;
       
   573 		}
       
   574 		if (v == 0)
       
   575 			return -2;
       
   576 		off = v;
       
   577 	}
       
   578 	return -2;
       
   579 }
       
   580 
       
   581 static int nt_new(indexObject *self)
       
   582 {
       
   583 	if (self->ntlength == self->ntcapacity) {
       
   584 		self->ntcapacity *= 2;
       
   585 		self->nt = realloc(self->nt,
       
   586 				   self->ntcapacity * sizeof(nodetree));
       
   587 		if (self->nt == NULL) {
       
   588 			PyErr_SetString(PyExc_MemoryError, "out of memory");
       
   589 			return -1;
       
   590 		}
       
   591 		memset(&self->nt[self->ntlength], 0,
       
   592 		       sizeof(nodetree) * (self->ntcapacity - self->ntlength));
       
   593 	}
       
   594 	return self->ntlength++;
       
   595 }
       
   596 
       
   597 static int nt_insert(indexObject *self, const char *node, int rev)
       
   598 {
       
   599 	int level = 0;
       
   600 	int off = 0;
       
   601 
       
   602 	while (level < 20) {
       
   603 		int k = nt_level(node, level);
       
   604 		nodetree *n;
       
   605 		int v;
       
   606 
       
   607 		n = &self->nt[off];
       
   608 		v = n->children[k];
       
   609 
       
   610 		if (v == 0) {
       
   611 			n->children[k] = -rev - 1;
       
   612 			return 0;
       
   613 		}
       
   614 		if (v < 0) {
       
   615 			const char *oldnode = index_node(self, -v - 1);
       
   616 			int noff;
       
   617 
       
   618 			if (!oldnode || !memcmp(oldnode, node, 20)) {
       
   619 				n->children[k] = -rev - 1;
       
   620 				return 0;
       
   621 			}
       
   622 			noff = nt_new(self);
       
   623 			if (noff == -1)
       
   624 				return -1;
       
   625 			/* self->nt may have been changed by realloc */
       
   626 			self->nt[off].children[k] = noff;
       
   627 			off = noff;
       
   628 			n = &self->nt[off];
       
   629 			n->children[nt_level(oldnode, ++level)] = v;
       
   630 			if (level > self->ntdepth)
       
   631 				self->ntdepth = level;
       
   632 			self->ntsplits += 1;
       
   633 		} else {
       
   634 			level += 1;
       
   635 			off = v;
       
   636 		}
       
   637 	}
       
   638 
       
   639 	return -1;
       
   640 }
       
   641 
       
   642 /*
       
   643  * Return values:
       
   644  *
       
   645  *   -3: error (exception set)
       
   646  *   -2: not found (no exception set)
       
   647  * rest: valid rev
       
   648  */
       
   649 static int index_find_node(indexObject *self,
       
   650 			   const char *node, Py_ssize_t nodelen)
       
   651 {
       
   652 	int rev;
       
   653 
       
   654 	self->ntlookups++;
       
   655 	rev = nt_find(self, node, nodelen);
       
   656 	if (rev >= -1)
       
   657 		return rev;
       
   658 
       
   659 	if (self->nt == NULL) {
       
   660 		self->ntcapacity = self->raw_length < 4
       
   661 			? 4 : self->raw_length / 2;
       
   662 		self->nt = calloc(self->ntcapacity, sizeof(nodetree));
       
   663 		if (self->nt == NULL) {
       
   664 			PyErr_SetString(PyExc_MemoryError, "out of memory");
       
   665 			return -3;
       
   666 		}
       
   667 		self->ntlength = 1;
       
   668 		self->ntrev = (int)index_length(self) - 1;
       
   669 		self->ntlookups = 1;
       
   670 		self->ntmisses = 0;
       
   671 	}
       
   672 
       
   673 	/*
       
   674 	 * For the first handful of lookups, we scan the entire index,
       
   675 	 * and cache only the matching nodes. This optimizes for cases
       
   676 	 * like "hg tip", where only a few nodes are accessed.
       
   677 	 *
       
   678 	 * After that, we cache every node we visit, using a single
       
   679 	 * scan amortized over multiple lookups.  This gives the best
       
   680 	 * bulk performance, e.g. for "hg log".
       
   681 	 */
       
   682 	if (self->ntmisses++ < 4) {
       
   683 		for (rev = self->ntrev - 1; rev >= 0; rev--) {
       
   684 			const char *n = index_node(self, rev);
       
   685 			if (n == NULL)
       
   686 				return -2;
       
   687 			if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
       
   688 				if (nt_insert(self, n, rev) == -1)
       
   689 					return -3;
       
   690 				break;
       
   691 			}
       
   692 		}
       
   693 	} else {
       
   694 		for (rev = self->ntrev - 1; rev >= 0; rev--) {
       
   695 			const char *n = index_node(self, rev);
       
   696 			if (n == NULL)
       
   697 				return -2;
       
   698 			if (nt_insert(self, n, rev) == -1)
       
   699 				return -3;
       
   700 			if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
       
   701 				break;
       
   702 			}
       
   703 		}
       
   704 		self->ntrev = rev;
       
   705 	}
       
   706 
       
   707 	if (rev >= 0)
       
   708 		return rev;
       
   709 	return -2;
       
   710 }
       
   711 
       
   712 static PyObject *raise_revlog_error(void)
       
   713 {
       
   714 	static PyObject *errclass;
       
   715 	PyObject *mod = NULL, *errobj;
       
   716 
       
   717 	if (errclass == NULL) {
       
   718 		PyObject *dict;
       
   719 
       
   720 		mod = PyImport_ImportModule("mercurial.error");
       
   721 		if (mod == NULL)
       
   722 			goto classfail;
       
   723 
       
   724 		dict = PyModule_GetDict(mod);
       
   725 		if (dict == NULL)
       
   726 			goto classfail;
       
   727 
       
   728 		errclass = PyDict_GetItemString(dict, "RevlogError");
       
   729 		if (errclass == NULL) {
       
   730 			PyErr_SetString(PyExc_SystemError,
       
   731 					"could not find RevlogError");
       
   732 			goto classfail;
       
   733 		}
       
   734 		Py_INCREF(errclass);
       
   735 	}
       
   736 
       
   737 	errobj = PyObject_CallFunction(errclass, NULL);
       
   738 	if (errobj == NULL)
       
   739 		return NULL;
       
   740 	PyErr_SetObject(errclass, errobj);
       
   741 	return errobj;
       
   742 
       
   743 classfail:
       
   744 	Py_XDECREF(mod);
       
   745 	return NULL;
       
   746 }
       
   747 
       
   748 static PyObject *index_getitem(indexObject *self, PyObject *value)
       
   749 {
       
   750 	char *node;
       
   751 	Py_ssize_t nodelen;
       
   752 	int rev;
       
   753 
       
   754 	if (PyInt_Check(value))
       
   755 		return index_get(self, PyInt_AS_LONG(value));
       
   756 
       
   757 	if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
       
   758 		return NULL;
       
   759 	rev = index_find_node(self, node, nodelen);
       
   760 	if (rev >= -1)
       
   761 		return PyInt_FromLong(rev);
       
   762 	if (rev == -2)
       
   763 		raise_revlog_error();
       
   764 	return NULL;
       
   765 }
       
   766 
       
   767 static PyObject *index_m_get(indexObject *self, PyObject *args)
       
   768 {
       
   769 	char *node;
       
   770 	int nodelen, rev;
       
   771 
       
   772 	if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
       
   773 		return NULL;
       
   774 
       
   775 	rev = index_find_node(self, node, nodelen);
       
   776 	if (rev ==  -3)
       
   777 		return NULL;
       
   778 	if (rev == -2)
       
   779 		Py_RETURN_NONE;
       
   780 	return PyInt_FromLong(rev);
       
   781 }
       
   782 
       
   783 static int index_contains(indexObject *self, PyObject *value)
       
   784 {
       
   785 	char *node;
       
   786 	Py_ssize_t nodelen;
       
   787 
       
   788 	if (PyInt_Check(value)) {
       
   789 		long rev = PyInt_AS_LONG(value);
       
   790 		return rev >= -1 && rev < index_length(self);
       
   791 	}
       
   792 
       
   793 	if (!PyString_Check(value))
       
   794 		return 0;
       
   795 
       
   796 	node = PyString_AS_STRING(value);
       
   797 	nodelen = PyString_GET_SIZE(value);
       
   798 
       
   799 	switch (index_find_node(self, node, nodelen)) {
       
   800 	case -3:
       
   801 		return -1;
       
   802 	case -2:
       
   803 		return 0;
       
   804 	default:
       
   805 		return 1;
       
   806 	}
       
   807 }
       
   808 
       
   809 /*
       
   810  * Invalidate any trie entries introduced by added revs.
       
   811  */
       
   812 static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
       
   813 {
       
   814 	Py_ssize_t i, len = PyList_GET_SIZE(self->added);
       
   815 
       
   816 	for (i = start; i < len; i++) {
       
   817 		PyObject *tuple = PyList_GET_ITEM(self->added, i);
       
   818 		PyObject *node = PyTuple_GET_ITEM(tuple, 7);
       
   819 
       
   820 		nt_insert(self, PyString_AS_STRING(node), -1);
       
   821 	}
       
   822 
       
   823 	if (start == 0) {
       
   824 		Py_DECREF(self->added);
       
   825 		self->added = NULL;
       
   826 	}
       
   827 }
       
   828 
       
   829 /*
       
   830  * Delete a numeric range of revs, which must be at the end of the
       
   831  * range, but exclude the sentinel nullid entry.
       
   832  */
       
   833 static int index_slice_del(indexObject *self, PyObject *item)
   414 {
   834 {
   415 	Py_ssize_t start, stop, step, slicelength;
   835 	Py_ssize_t start, stop, step, slicelength;
   416 	Py_ssize_t length = index_length(self);
   836 	Py_ssize_t length = index_length(self);
   417 
       
   418 	if (!PySlice_Check(item) || value != NULL) {
       
   419 		PyErr_SetString(PyExc_TypeError,
       
   420 				"revlog index only supports slice deletion");
       
   421 		return -1;
       
   422 	}
       
   423 
   837 
   424 	if (PySlice_GetIndicesEx((PySliceObject*)item, length,
   838 	if (PySlice_GetIndicesEx((PySliceObject*)item, length,
   425 				 &start, &stop, &step, &slicelength) < 0)
   839 				 &start, &stop, &step, &slicelength) < 0)
   426 		return -1;
   840 		return -1;
   427 
   841 
   447 		PyErr_SetString(PyExc_IndexError,
   861 		PyErr_SetString(PyExc_IndexError,
   448 				"revlog index deletion indices are invalid");
   862 				"revlog index deletion indices are invalid");
   449 		return -1;
   863 		return -1;
   450 	}
   864 	}
   451 
   865 
   452 	if (start < self->length) {
   866 	if (start < self->length - 1) {
       
   867 		if (self->nt) {
       
   868 			Py_ssize_t i;
       
   869 
       
   870 			for (i = start + 1; i < self->length - 1; i++) {
       
   871 				const char *node = index_node(self, i);
       
   872 
       
   873 				if (node)
       
   874 					nt_insert(self, node, -1);
       
   875 			}
       
   876 			if (self->added)
       
   877 				nt_invalidate_added(self, 0);
       
   878 			if (self->ntrev > start)
       
   879 				self->ntrev = (int)start;
       
   880 		}
   453 		self->length = start + 1;
   881 		self->length = start + 1;
   454 		if (self->added) {
       
   455 			Py_DECREF(self->added);
       
   456 			self->added = NULL;
       
   457 		}
       
   458 		return 0;
   882 		return 0;
   459 	}
   883 	}
   460 
   884 
   461 	return PyList_SetSlice(self->added, start - self->length + 1,
   885 	if (self->nt) {
   462 			       PyList_GET_SIZE(self->added),
   886 		nt_invalidate_added(self, start - self->length + 1);
   463 			       NULL);
   887 		if (self->ntrev > start)
   464 }
   888 			self->ntrev = (int)start;
   465 
   889 	}
       
   890 	return self->added
       
   891 		? PyList_SetSlice(self->added, start - self->length + 1,
       
   892 				  PyList_GET_SIZE(self->added), NULL)
       
   893 		: 0;
       
   894 }
       
   895 
       
   896 /*
       
   897  * Supported ops:
       
   898  *
       
   899  * slice deletion
       
   900  * string assignment (extend node->rev mapping)
       
   901  * string deletion (shrink node->rev mapping)
       
   902  */
       
   903 static int index_assign_subscript(indexObject *self, PyObject *item,
       
   904 				  PyObject *value)
       
   905 {
       
   906 	char *node;
       
   907 	Py_ssize_t nodelen;
       
   908 	long rev;
       
   909 
       
   910 	if (PySlice_Check(item) && value == NULL)
       
   911 		return index_slice_del(self, item);
       
   912 
       
   913 	if (node_check(item, &node, &nodelen) == -1)
       
   914 		return -1;
       
   915 
       
   916 	if (value == NULL)
       
   917 		return self->nt ? nt_insert(self, node, -1) : 0;
       
   918 	rev = PyInt_AsLong(value);
       
   919 	if (rev > INT_MAX || rev < 0) {
       
   920 		if (!PyErr_Occurred())
       
   921 			PyErr_SetString(PyExc_ValueError, "rev out of range");
       
   922 		return -1;
       
   923 	}
       
   924 	return nt_insert(self, node, (int)rev);
       
   925 }
       
   926 
       
   927 /*
       
   928  * Find all RevlogNG entries in an index that has inline data. Update
       
   929  * the optional "offsets" table with those entries.
       
   930  */
   466 static long inline_scan(indexObject *self, const char **offsets)
   931 static long inline_scan(indexObject *self, const char **offsets)
   467 {
   932 {
   468 	const char *data = PyString_AS_STRING(self->data);
   933 	const char *data = PyString_AS_STRING(self->data);
   469 	const char *end = data + PyString_GET_SIZE(self->data);
   934 	const char *end = data + PyString_GET_SIZE(self->data);
   470 	const long hdrsize = 64;
   935 	const long hdrsize = 64;
   504 	self->data = data_obj;
   969 	self->data = data_obj;
   505 	self->cache = NULL;
   970 	self->cache = NULL;
   506 
   971 
   507 	self->added = NULL;
   972 	self->added = NULL;
   508 	self->offsets = NULL;
   973 	self->offsets = NULL;
       
   974 	self->nt = NULL;
       
   975 	self->ntlength = self->ntcapacity = 0;
       
   976 	self->ntdepth = self->ntsplits = 0;
       
   977 	self->ntlookups = self->ntmisses = 0;
       
   978 	self->ntrev = -1;
   509 	Py_INCREF(self->data);
   979 	Py_INCREF(self->data);
   510 
   980 
   511 	if (self->inlined) {
   981 	if (self->inlined) {
   512 		long len = inline_scan(self, NULL);
   982 		long len = inline_scan(self, NULL);
   513 		if (len == -1)
   983 		if (len == -1)
   539 
  1009 
   540 	return index_real_init(self, data, size, inlined_obj,
  1010 	return index_real_init(self, data, size, inlined_obj,
   541 			       PyTuple_GET_ITEM(args, 0));
  1011 			       PyTuple_GET_ITEM(args, 0));
   542 }
  1012 }
   543 
  1013 
       
  1014 static PyObject *index_nodemap(indexObject *self)
       
  1015 {
       
  1016 	return (PyObject *)self;
       
  1017 }
       
  1018 
   544 static void index_dealloc(indexObject *self)
  1019 static void index_dealloc(indexObject *self)
   545 {
  1020 {
   546 	_index_clearcaches(self);
  1021 	_index_clearcaches(self);
   547 	Py_DECREF(self->data);
  1022 	Py_DECREF(self->data);
   548 	Py_XDECREF(self->added);
  1023 	Py_XDECREF(self->added);
   552 static PySequenceMethods index_sequence_methods = {
  1027 static PySequenceMethods index_sequence_methods = {
   553 	(lenfunc)index_length,   /* sq_length */
  1028 	(lenfunc)index_length,   /* sq_length */
   554 	0,                       /* sq_concat */
  1029 	0,                       /* sq_concat */
   555 	0,                       /* sq_repeat */
  1030 	0,                       /* sq_repeat */
   556 	(ssizeargfunc)index_get, /* sq_item */
  1031 	(ssizeargfunc)index_get, /* sq_item */
       
  1032 	0,                       /* sq_slice */
       
  1033 	0,                       /* sq_ass_item */
       
  1034 	0,                       /* sq_ass_slice */
       
  1035 	(objobjproc)index_contains, /* sq_contains */
   557 };
  1036 };
   558 
  1037 
   559 static PyMappingMethods index_mapping_methods = {
  1038 static PyMappingMethods index_mapping_methods = {
   560 	(lenfunc)index_length,                 /* mp_length */
  1039 	(lenfunc)index_length,                 /* mp_length */
   561 	NULL,                                  /* mp_subscript */
  1040 	(binaryfunc)index_getitem,             /* mp_subscript */
   562 	(objobjargproc)index_assign_subscript, /* mp_ass_subscript */
  1041 	(objobjargproc)index_assign_subscript, /* mp_ass_subscript */
   563 };
  1042 };
   564 
  1043 
   565 static PyMethodDef index_methods[] = {
  1044 static PyMethodDef index_methods[] = {
   566 	{"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
  1045 	{"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
   567 	 "clear the index caches"},
  1046 	 "clear the index caches"},
       
  1047 	{"get", (PyCFunction)index_m_get, METH_VARARGS,
       
  1048 	 "get an index entry"},
   568 	{"insert", (PyCFunction)index_insert, METH_VARARGS,
  1049 	{"insert", (PyCFunction)index_insert, METH_VARARGS,
   569 	 "insert an index entry"},
  1050 	 "insert an index entry"},
       
  1051 	{"stats", (PyCFunction)index_stats, METH_NOARGS,
       
  1052 	 "stats for the index"},
       
  1053 	{NULL} /* Sentinel */
       
  1054 };
       
  1055 
       
  1056 static PyGetSetDef index_getset[] = {
       
  1057 	{"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
   570 	{NULL} /* Sentinel */
  1058 	{NULL} /* Sentinel */
   571 };
  1059 };
   572 
  1060 
   573 static PyTypeObject indexType = {
  1061 static PyTypeObject indexType = {
   574 	PyObject_HEAD_INIT(NULL)
  1062 	PyObject_HEAD_INIT(NULL)
   599 	0,                         /* tp_weaklistoffset */
  1087 	0,                         /* tp_weaklistoffset */
   600 	0,                         /* tp_iter */
  1088 	0,                         /* tp_iter */
   601 	0,                         /* tp_iternext */
  1089 	0,                         /* tp_iternext */
   602 	index_methods,             /* tp_methods */
  1090 	index_methods,             /* tp_methods */
   603 	0,                         /* tp_members */
  1091 	0,                         /* tp_members */
   604 	0,                         /* tp_getset */
  1092 	index_getset,              /* tp_getset */
   605 	0,                         /* tp_base */
  1093 	0,                         /* tp_base */
   606 	0,                         /* tp_dict */
  1094 	0,                         /* tp_dict */
   607 	0,                         /* tp_descr_get */
  1095 	0,                         /* tp_descr_get */
   608 	0,                         /* tp_descr_set */
  1096 	0,                         /* tp_descr_set */
   609 	0,                         /* tp_dictoffset */
  1097 	0,                         /* tp_dictoffset */
   611 	0,                         /* tp_alloc */
  1099 	0,                         /* tp_alloc */
   612 	PyType_GenericNew,         /* tp_new */
  1100 	PyType_GenericNew,         /* tp_new */
   613 };
  1101 };
   614 
  1102 
   615 /*
  1103 /*
   616  * returns a tuple of the form (index, None, cache) with elements as
  1104  * returns a tuple of the form (index, index, cache) with elements as
   617  * follows:
  1105  * follows:
   618  *
  1106  *
   619  * index: an index object that lazily parses the RevlogNG records
  1107  * index: an index object that lazily parses RevlogNG records
   620  * cache: if data is inlined, a tuple (index_file_content, 0), else None
  1108  * cache: if data is inlined, a tuple (index_file_content, 0), else None
   621  *
  1109  *
   622  * added complications are for backwards compatibility
  1110  * added complications are for backwards compatibility
   623  */
  1111  */
   624 static PyObject *parse_index2(PyObject *self, PyObject *args)
  1112 static PyObject *parse_index2(PyObject *self, PyObject *args)
   649 	} else {
  1137 	} else {
   650 		cache = Py_None;
  1138 		cache = Py_None;
   651 		Py_INCREF(cache);
  1139 		Py_INCREF(cache);
   652 	}
  1140 	}
   653 
  1141 
       
  1142 	Py_INCREF(idx);
       
  1143 
   654 	tuple = Py_BuildValue("NN", idx, cache);
  1144 	tuple = Py_BuildValue("NN", idx, cache);
   655 	if (!tuple)
  1145 	if (!tuple)
   656 		goto bail;
  1146 		goto bail;
   657 	return tuple;
  1147 	return tuple;
   658 
  1148 
   672 	{NULL, NULL}
  1162 	{NULL, NULL}
   673 };
  1163 };
   674 
  1164 
   675 static void module_init(PyObject *mod)
  1165 static void module_init(PyObject *mod)
   676 {
  1166 {
   677 	static const char nullid[20];
       
   678 
       
   679 	if (PyType_Ready(&indexType) < 0)
  1167 	if (PyType_Ready(&indexType) < 0)
   680 		return;
  1168 		return;
   681 	Py_INCREF(&indexType);
  1169 	Py_INCREF(&indexType);
   682 
  1170 
   683 	PyModule_AddObject(mod, "index", (PyObject *)&indexType);
  1171 	PyModule_AddObject(mod, "index", (PyObject *)&indexType);
   708 {
  1196 {
   709 	PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
  1197 	PyObject *mod = Py_InitModule3("parsers", methods, parsers_doc);
   710 	module_init(mod);
  1198 	module_init(mod);
   711 }
  1199 }
   712 #endif
  1200 #endif
   713