mercurial/cext/revlog.c
changeset 46708 358737abeeef
parent 46328 0216abfb2d3e
child 46717 502e795b55ac
equal deleted inserted replaced
46707:eed42f1c22d6 46708:358737abeeef
    96 	int ntinitialized;      /* 0 or 1 */
    96 	int ntinitialized;      /* 0 or 1 */
    97 	int ntrev;              /* last rev scanned */
    97 	int ntrev;              /* last rev scanned */
    98 	int ntlookups;          /* # lookups */
    98 	int ntlookups;          /* # lookups */
    99 	int ntmisses;           /* # lookups that miss the cache */
    99 	int ntmisses;           /* # lookups that miss the cache */
   100 	int inlined;
   100 	int inlined;
       
   101 	long hdrsize; /* size of index headers. Differs in v1 v.s. v2 format */
   101 };
   102 };
   102 
   103 
   103 static Py_ssize_t index_length(const indexObject *self)
   104 static Py_ssize_t index_length(const indexObject *self)
   104 {
   105 {
   105 	return self->length + self->new_length;
   106 	return self->length + self->new_length;
   111 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
   112 static Py_ssize_t inline_scan(indexObject *self, const char **offsets);
   112 
   113 
   113 static int index_find_node(indexObject *self, const char *node);
   114 static int index_find_node(indexObject *self, const char *node);
   114 
   115 
   115 #if LONG_MAX == 0x7fffffffL
   116 #if LONG_MAX == 0x7fffffffL
   116 static const char *const tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
   117 static const char *const v1_tuple_format = PY23("Kiiiiiis#", "Kiiiiiiy#");
       
   118 static const char *const v2_tuple_format =
       
   119     PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki");
   117 #else
   120 #else
   118 static const char *const tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
   121 static const char *const v1_tuple_format = PY23("kiiiiiis#", "kiiiiiiy#");
       
   122 static const char *const v2_tuple_format =
       
   123     PY23("kiiiiiis#ki", "kiiiiiiy#ki");
   119 #endif
   124 #endif
   120 
   125 
   121 /* A RevlogNG v1 index entry is 64 bytes long. */
   126 /* A RevlogNG v1 index entry is 64 bytes long. */
   122 static const long v1_hdrsize = 64;
   127 static const long v1_hdrsize = 64;
       
   128 
       
   129 /* A Revlogv2 index entry is 96 bytes long. */
       
   130 static const long v2_hdrsize = 96;
   123 
   131 
   124 static void raise_revlog_error(void)
   132 static void raise_revlog_error(void)
   125 {
   133 {
   126 	PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
   134 	PyObject *mod = NULL, *dict = NULL, *errclass = NULL;
   127 
   135 
   155  * Return a pointer to the beginning of a RevlogNG record.
   163  * Return a pointer to the beginning of a RevlogNG record.
   156  */
   164  */
   157 static const char *index_deref(indexObject *self, Py_ssize_t pos)
   165 static const char *index_deref(indexObject *self, Py_ssize_t pos)
   158 {
   166 {
   159 	if (pos >= self->length)
   167 	if (pos >= self->length)
   160 		return self->added + (pos - self->length) * v1_hdrsize;
   168 		return self->added + (pos - self->length) * self->hdrsize;
   161 
   169 
   162 	if (self->inlined && pos > 0) {
   170 	if (self->inlined && pos > 0) {
   163 		if (self->offsets == NULL) {
   171 		if (self->offsets == NULL) {
   164 			Py_ssize_t ret;
   172 			Py_ssize_t ret;
   165 			self->offsets =
   173 			self->offsets =
   172 			};
   180 			};
   173 		}
   181 		}
   174 		return self->offsets[pos];
   182 		return self->offsets[pos];
   175 	}
   183 	}
   176 
   184 
   177 	return (const char *)(self->buf.buf) + pos * v1_hdrsize;
   185 	return (const char *)(self->buf.buf) + pos * self->hdrsize;
   178 }
   186 }
   179 
   187 
   180 /*
   188 /*
   181  * Get parents of the given rev.
   189  * Get parents of the given rev.
   182  *
   190  *
   278  *    4 bytes: parent 2 revision
   286  *    4 bytes: parent 2 revision
   279  *   32 bytes: nodeid (only 20 bytes used with SHA-1)
   287  *   32 bytes: nodeid (only 20 bytes used with SHA-1)
   280  */
   288  */
   281 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
   289 static PyObject *index_get(indexObject *self, Py_ssize_t pos)
   282 {
   290 {
   283 	uint64_t offset_flags;
   291 	uint64_t offset_flags, sidedata_offset;
   284 	int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
   292 	int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
       
   293 	    sidedata_comp_len;
   285 	const char *c_node_id;
   294 	const char *c_node_id;
   286 	const char *data;
   295 	const char *data;
   287 	Py_ssize_t length = index_length(self);
   296 	Py_ssize_t length = index_length(self);
   288 
   297 
   289 	if (pos == nullrev) {
   298 	if (pos == nullrev) {
   318 	link_rev = getbe32(data + 20);
   327 	link_rev = getbe32(data + 20);
   319 	parent_1 = getbe32(data + 24);
   328 	parent_1 = getbe32(data + 24);
   320 	parent_2 = getbe32(data + 28);
   329 	parent_2 = getbe32(data + 28);
   321 	c_node_id = data + 32;
   330 	c_node_id = data + 32;
   322 
   331 
   323 	return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
   332 	if (self->hdrsize == v1_hdrsize) {
   324 	                     base_rev, link_rev, parent_1, parent_2, c_node_id,
   333 		return Py_BuildValue(v1_tuple_format, offset_flags, comp_len,
   325 	                     self->nodelen);
   334 		                     uncomp_len, base_rev, link_rev, parent_1,
       
   335 		                     parent_2, c_node_id, self->nodelen);
       
   336 	} else {
       
   337 		sidedata_offset = getbe64(data + 64);
       
   338 		sidedata_comp_len = getbe32(data + 72);
       
   339 
       
   340 		return Py_BuildValue(v2_tuple_format, offset_flags, comp_len,
       
   341 		                     uncomp_len, base_rev, link_rev, parent_1,
       
   342 		                     parent_2, c_node_id, self->nodelen,
       
   343 		                     sidedata_offset, sidedata_comp_len);
       
   344 	}
   326 }
   345 }
   327 
   346 
   328 /*
   347 /*
   329  * Return the hash of node corresponding to the given rev.
   348  * Return the hash of node corresponding to the given rev.
   330  */
   349  */
   371 	return -1;
   390 	return -1;
   372 }
   391 }
   373 
   392 
   374 static PyObject *index_append(indexObject *self, PyObject *obj)
   393 static PyObject *index_append(indexObject *self, PyObject *obj)
   375 {
   394 {
   376 	uint64_t offset_flags;
   395 	uint64_t offset_flags, sidedata_offset;
   377 	int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
   396 	int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
   378 	Py_ssize_t c_node_id_len;
   397 	Py_ssize_t c_node_id_len, sidedata_comp_len;
   379 	const char *c_node_id;
   398 	const char *c_node_id;
   380 	char *data;
   399 	char *data;
   381 
   400 
   382 	if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
   401 	if (self->hdrsize == v1_hdrsize) {
   383 	                      &uncomp_len, &base_rev, &link_rev, &parent_1,
   402 		if (!PyArg_ParseTuple(obj, v1_tuple_format, &offset_flags,
   384 	                      &parent_2, &c_node_id, &c_node_id_len)) {
   403 		                      &comp_len, &uncomp_len, &base_rev,
   385 		PyErr_SetString(PyExc_TypeError, "8-tuple required");
   404 		                      &link_rev, &parent_1, &parent_2,
   386 		return NULL;
   405 		                      &c_node_id, &c_node_id_len)) {
   387 	}
   406 			PyErr_SetString(PyExc_TypeError, "8-tuple required");
       
   407 			return NULL;
       
   408 		}
       
   409 	} else {
       
   410 		if (!PyArg_ParseTuple(
       
   411 		        obj, v2_tuple_format, &offset_flags, &comp_len,
       
   412 		        &uncomp_len, &base_rev, &link_rev, &parent_1, &parent_2,
       
   413 		        &c_node_id, &c_node_id_len, &sidedata_offset, &sidedata_comp_len)) {
       
   414 			PyErr_SetString(PyExc_TypeError, "10-tuple required");
       
   415 			return NULL;
       
   416 		}
       
   417 	}
       
   418 
   388 	if (c_node_id_len != self->nodelen) {
   419 	if (c_node_id_len != self->nodelen) {
   389 		PyErr_SetString(PyExc_TypeError, "invalid node");
   420 		PyErr_SetString(PyExc_TypeError, "invalid node");
   390 		return NULL;
   421 		return NULL;
   391 	}
   422 	}
   392 
   423 
   393 	if (self->new_length == self->added_length) {
   424 	if (self->new_length == self->added_length) {
   394 		size_t new_added_length =
   425 		size_t new_added_length =
   395 		    self->added_length ? self->added_length * 2 : 4096;
   426 		    self->added_length ? self->added_length * 2 : 4096;
   396 		void *new_added =
   427 		void *new_added = PyMem_Realloc(self->added, new_added_length *
   397 		    PyMem_Realloc(self->added, new_added_length * v1_hdrsize);
   428 		                                                 self->hdrsize);
   398 		if (!new_added)
   429 		if (!new_added)
   399 			return PyErr_NoMemory();
   430 			return PyErr_NoMemory();
   400 		self->added = new_added;
   431 		self->added = new_added;
   401 		self->added_length = new_added_length;
   432 		self->added_length = new_added_length;
   402 	}
   433 	}
   403 	rev = self->length + self->new_length;
   434 	rev = self->length + self->new_length;
   404 	data = self->added + v1_hdrsize * self->new_length++;
   435 	data = self->added + self->hdrsize * self->new_length++;
   405 	putbe32(offset_flags >> 32, data);
   436 	putbe32(offset_flags >> 32, data);
   406 	putbe32(offset_flags & 0xffffffffU, data + 4);
   437 	putbe32(offset_flags & 0xffffffffU, data + 4);
   407 	putbe32(comp_len, data + 8);
   438 	putbe32(comp_len, data + 8);
   408 	putbe32(uncomp_len, data + 12);
   439 	putbe32(uncomp_len, data + 12);
   409 	putbe32(base_rev, data + 16);
   440 	putbe32(base_rev, data + 16);
   410 	putbe32(link_rev, data + 20);
   441 	putbe32(link_rev, data + 20);
   411 	putbe32(parent_1, data + 24);
   442 	putbe32(parent_1, data + 24);
   412 	putbe32(parent_2, data + 28);
   443 	putbe32(parent_2, data + 28);
   413 	memcpy(data + 32, c_node_id, c_node_id_len);
   444 	memcpy(data + 32, c_node_id, c_node_id_len);
       
   445 	/* Padding since SHA-1 is only 20 bytes for now */
   414 	memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
   446 	memset(data + 32 + c_node_id_len, 0, 32 - c_node_id_len);
       
   447 	if (self->hdrsize != v1_hdrsize) {
       
   448 		putbe64(sidedata_offset, data + 64);
       
   449 		putbe32(sidedata_comp_len, data + 72);
       
   450 		/* Padding for 96 bytes alignment */
       
   451 		memset(data + 76, 0, self->hdrsize - 76);
       
   452 	}
   415 
   453 
   416 	if (self->ntinitialized)
   454 	if (self->ntinitialized)
   417 		nt_insert(&self->nt, c_node_id, rev);
   455 		nt_insert(&self->nt, c_node_id, rev);
   418 
   456 
   419 	Py_CLEAR(self->headrevs);
   457 	Py_CLEAR(self->headrevs);
  2561 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
  2599 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
  2562 {
  2600 {
  2563 	const char *data = (const char *)self->buf.buf;
  2601 	const char *data = (const char *)self->buf.buf;
  2564 	Py_ssize_t pos = 0;
  2602 	Py_ssize_t pos = 0;
  2565 	Py_ssize_t end = self->buf.len;
  2603 	Py_ssize_t end = self->buf.len;
  2566 	long incr = v1_hdrsize;
  2604 	long incr = self->hdrsize;
  2567 	Py_ssize_t len = 0;
  2605 	Py_ssize_t len = 0;
  2568 
  2606 
  2569 	while (pos + v1_hdrsize <= end && pos >= 0) {
  2607 	while (pos + self->hdrsize <= end && pos >= 0) {
  2570 		uint32_t comp_len;
  2608 		uint32_t comp_len, sidedata_comp_len = 0;
  2571 		/* 3rd element of header is length of compressed inline data */
  2609 		/* 3rd element of header is length of compressed inline data */
  2572 		comp_len = getbe32(data + pos + 8);
  2610 		comp_len = getbe32(data + pos + 8);
  2573 		incr = v1_hdrsize + comp_len;
  2611 		if (self->hdrsize == v2_hdrsize) {
       
  2612 			sidedata_comp_len = getbe32(data + pos + 72);
       
  2613 		}
       
  2614 		incr = self->hdrsize + comp_len + sidedata_comp_len;
  2574 		if (offsets)
  2615 		if (offsets)
  2575 			offsets[len] = data + pos;
  2616 			offsets[len] = data + pos;
  2576 		len++;
  2617 		len++;
  2577 		pos += incr;
  2618 		pos += incr;
  2578 	}
  2619 	}
  2584 	}
  2625 	}
  2585 
  2626 
  2586 	return len;
  2627 	return len;
  2587 }
  2628 }
  2588 
  2629 
  2589 static int index_init(indexObject *self, PyObject *args)
  2630 static int index_init(indexObject *self, PyObject *args, PyObject *kwargs)
  2590 {
  2631 {
  2591 	PyObject *data_obj, *inlined_obj;
  2632 	PyObject *data_obj, *inlined_obj, *revlogv2;
  2592 	Py_ssize_t size;
  2633 	Py_ssize_t size;
       
  2634 
       
  2635 	static char *kwlist[] = {"data", "inlined", "revlogv2", NULL};
  2593 
  2636 
  2594 	/* Initialize before argument-checking to avoid index_dealloc() crash.
  2637 	/* Initialize before argument-checking to avoid index_dealloc() crash.
  2595 	 */
  2638 	 */
  2596 	self->added = NULL;
  2639 	self->added = NULL;
  2597 	self->new_length = 0;
  2640 	self->new_length = 0;
  2604 	self->ntinitialized = 0;
  2647 	self->ntinitialized = 0;
  2605 	self->offsets = NULL;
  2648 	self->offsets = NULL;
  2606 	self->nodelen = 20;
  2649 	self->nodelen = 20;
  2607 	self->nullentry = NULL;
  2650 	self->nullentry = NULL;
  2608 
  2651 
  2609 	if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
  2652 	revlogv2 = NULL;
       
  2653 	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|O", kwlist,
       
  2654 	                                 &data_obj, &inlined_obj, &revlogv2))
  2610 		return -1;
  2655 		return -1;
  2611 	if (!PyObject_CheckBuffer(data_obj)) {
  2656 	if (!PyObject_CheckBuffer(data_obj)) {
  2612 		PyErr_SetString(PyExc_TypeError,
  2657 		PyErr_SetString(PyExc_TypeError,
  2613 		                "data does not support buffer interface");
  2658 		                "data does not support buffer interface");
  2614 		return -1;
  2659 		return -1;
  2616 	if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
  2661 	if (self->nodelen < 20 || self->nodelen > (Py_ssize_t)sizeof(nullid)) {
  2617 		PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
  2662 		PyErr_SetString(PyExc_RuntimeError, "unsupported node size");
  2618 		return -1;
  2663 		return -1;
  2619 	}
  2664 	}
  2620 
  2665 
  2621 	self->nullentry = Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0,
  2666 	if (revlogv2 && PyObject_IsTrue(revlogv2)) {
  2622 	                                -1, -1, -1, -1, nullid, self->nodelen);
  2667 		self->hdrsize = v2_hdrsize;
       
  2668 	} else {
       
  2669 		self->hdrsize = v1_hdrsize;
       
  2670 	}
       
  2671 
       
  2672 	if (self->hdrsize == v1_hdrsize) {
       
  2673 		self->nullentry =
       
  2674 		    Py_BuildValue(PY23("iiiiiiis#", "iiiiiiiy#"), 0, 0, 0, -1,
       
  2675 		                  -1, -1, -1, nullid, self->nodelen);
       
  2676 	} else {
       
  2677 		self->nullentry = Py_BuildValue(
       
  2678 		    PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0, -1, -1, -1,
       
  2679 		    -1, nullid, self->nodelen, 0, 0);
       
  2680 	}
       
  2681 
  2623 	if (!self->nullentry)
  2682 	if (!self->nullentry)
  2624 		return -1;
  2683 		return -1;
  2625 	PyObject_GC_UnTrack(self->nullentry);
  2684 	PyObject_GC_UnTrack(self->nullentry);
  2626 
  2685 
  2627 	if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
  2686 	if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
  2639 		Py_ssize_t len = inline_scan(self, NULL);
  2698 		Py_ssize_t len = inline_scan(self, NULL);
  2640 		if (len == -1)
  2699 		if (len == -1)
  2641 			goto bail;
  2700 			goto bail;
  2642 		self->length = len;
  2701 		self->length = len;
  2643 	} else {
  2702 	} else {
  2644 		if (size % v1_hdrsize) {
  2703 		if (size % self->hdrsize) {
  2645 			PyErr_SetString(PyExc_ValueError, "corrupt index file");
  2704 			PyErr_SetString(PyExc_ValueError, "corrupt index file");
  2646 			goto bail;
  2705 			goto bail;
  2647 		}
  2706 		}
  2648 		self->length = size / v1_hdrsize;
  2707 		self->length = size / self->hdrsize;
  2649 	}
  2708 	}
  2650 
  2709 
  2651 	return 0;
  2710 	return 0;
  2652 bail:
  2711 bail:
  2653 	return -1;
  2712 	return -1;
  2795     (initproc)index_init,          /* tp_init */
  2854     (initproc)index_init,          /* tp_init */
  2796     0,                             /* tp_alloc */
  2855     0,                             /* tp_alloc */
  2797 };
  2856 };
  2798 
  2857 
  2799 /*
  2858 /*
  2800  * returns a tuple of the form (index, index, cache) with elements as
  2859  * returns a tuple of the form (index, cache) with elements as
  2801  * follows:
  2860  * follows:
  2802  *
  2861  *
  2803  * index: an index object that lazily parses RevlogNG records
  2862  * index: an index object that lazily parses Revlog (v1 or v2) records
  2804  * cache: if data is inlined, a tuple (0, index_file_content), else None
  2863  * cache: if data is inlined, a tuple (0, index_file_content), else None
  2805  *        index_file_content could be a string, or a buffer
  2864  *        index_file_content could be a string, or a buffer
  2806  *
  2865  *
  2807  * added complications are for backwards compatibility
  2866  * added complications are for backwards compatibility
  2808  */
  2867  */
  2809 PyObject *parse_index2(PyObject *self, PyObject *args)
  2868 PyObject *parse_index2(PyObject *self, PyObject *args, PyObject *kwargs)
  2810 {
  2869 {
  2811 	PyObject *cache = NULL;
  2870 	PyObject *cache = NULL;
  2812 	indexObject *idx;
  2871 	indexObject *idx;
  2813 	int ret;
  2872 	int ret;
  2814 
  2873 
  2815 	idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
  2874 	idx = PyObject_New(indexObject, &HgRevlogIndex_Type);
  2816 	if (idx == NULL)
  2875 	if (idx == NULL)
  2817 		goto bail;
  2876 		goto bail;
  2818 
  2877 
  2819 	ret = index_init(idx, args);
  2878 	ret = index_init(idx, args, kwargs);
  2820 	if (ret == -1)
  2879 	if (ret == -1)
  2821 		goto bail;
  2880 		goto bail;
  2822 
  2881 
  2823 	if (idx->inlined) {
  2882 	if (idx->inlined) {
  2824 		cache = Py_BuildValue("iO", 0, idx->data);
  2883 		cache = Py_BuildValue("iO", 0, idx->data);