revlog: implement changelogv2 packing and unpacking in C
authorpacien <pacien.trangirard@pacien.net>
Mon, 14 Feb 2022 12:34:02 +0100
changeset 48768 7dd5a2c0116a
parent 48767 654baf1faa52
child 48769 1bb62821f080
revlog: implement changelogv2 packing and unpacking in C This introduces a C implementation of changelogv2 records packing and unpacking operations matching the pure Python counterpart, similarly to what we already have for revlogv1 and revlogv2. This is also necessary to access changelogv2 record fields from future Rust code without going through the Python part, which would annihilate any performance benefit. Differential Revision: https://phab.mercurial-scm.org/D12178
mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c	Thu Feb 10 16:20:14 2022 +0100
+++ b/mercurial/cext/revlog.c	Mon Feb 14 12:34:02 2022 +0100
@@ -133,8 +133,9 @@
 /* A Revlogv2 index entry is 96 bytes long. */
 static const long v2_entry_size = 96;
 
-static const long format_v1 = 1; /* Internal only, could be any number */
-static const long format_v2 = 2; /* Internal only, could be any number */
+static const long format_v1 = 1;  /* Internal only, could be any number */
+static const long format_v2 = 2;  /* Internal only, could be any number */
+static const long format_cl2 = 3; /* Internal only, could be any number */
 
 static const long entry_v1_offset_high = 0;
 static const long entry_v1_offset_offset_flags = 4;
@@ -160,6 +161,19 @@
 static const long entry_v2_offset_all_comp_mode = 76;
 /* next free offset: 77 */
 
+static const long entry_cl2_offset_high = 0;
+static const long entry_cl2_offset_offset_flags = 4;
+static const long entry_cl2_offset_comp_len = 8;
+static const long entry_cl2_offset_uncomp_len = 12;
+static const long entry_cl2_offset_parent_1 = 16;
+static const long entry_cl2_offset_parent_2 = 20;
+static const long entry_cl2_offset_node_id = 24;
+static const long entry_cl2_offset_sidedata_offset = 56;
+static const long entry_cl2_offset_sidedata_comp_len = 64;
+static const long entry_cl2_offset_all_comp_mode = 68;
+static const long entry_cl2_offset_rank = 69;
+/* next free offset: 73 */
+
 static const char comp_mode_inline = 2;
 static const char rank_unknown = -1;
 
@@ -236,6 +250,9 @@
 	} else if (self->format_version == format_v2) {
 		ps[0] = getbe32(data + entry_v2_offset_parent_1);
 		ps[1] = getbe32(data + entry_v2_offset_parent_2);
+	} else if (self->format_version == format_cl2) {
+		ps[0] = getbe32(data + entry_cl2_offset_parent_1);
+		ps[1] = getbe32(data + entry_cl2_offset_parent_2);
 	} else {
 		raise_revlog_error();
 		return -1;
@@ -307,6 +324,10 @@
 			    getbe32(data + entry_v2_offset_high);
 			offset |= ((uint64_t)offset_high) << 32;
 		}
+	} else if (self->format_version == format_cl2) {
+		uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
+		offset = getbe32(data + entry_cl2_offset_offset_flags);
+		offset |= ((uint64_t)offset_high) << 32;
 	} else {
 		raise_revlog_error();
 		return -1;
@@ -329,6 +350,8 @@
 		tmp = (int)getbe32(data + entry_v1_offset_comp_len);
 	} else if (self->format_version == format_v2) {
 		tmp = (int)getbe32(data + entry_v2_offset_comp_len);
+	} else if (self->format_version == format_cl2) {
+		tmp = (int)getbe32(data + entry_cl2_offset_comp_len);
 	} else {
 		raise_revlog_error();
 		return -1;
@@ -357,7 +380,7 @@
 {
 	uint64_t offset_flags, sidedata_offset;
 	int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
-	    sidedata_comp_len;
+	    sidedata_comp_len, rank = rank_unknown;
 	char data_comp_mode, sidedata_comp_mode;
 	const char *c_node_id;
 	const char *data;
@@ -434,6 +457,30 @@
 		data_comp_mode = data[entry_v2_offset_all_comp_mode] & 3;
 		sidedata_comp_mode =
 		    ((data[entry_v2_offset_all_comp_mode] >> 2) & 3);
+	} else if (self->format_version == format_cl2) {
+		uint32_t offset_high = getbe32(data + entry_cl2_offset_high);
+		offset_flags = getbe32(data + entry_cl2_offset_offset_flags);
+		offset_flags |= ((uint64_t)offset_high) << 32;
+		comp_len = getbe32(data + entry_cl2_offset_comp_len);
+		uncomp_len = getbe32(data + entry_cl2_offset_uncomp_len);
+		/* base_rev and link_rev are not stored in changelogv2, but are
+		 still used by some functions shared with the other revlogs.
+		 They are supposed to contain links to other revisions,
+		 but they always point to themselves in the case of a changelog.
+		*/
+		base_rev = pos;
+		link_rev = pos;
+		parent_1 = getbe32(data + entry_cl2_offset_parent_1);
+		parent_2 = getbe32(data + entry_cl2_offset_parent_2);
+		c_node_id = data + entry_cl2_offset_node_id;
+		sidedata_offset =
+		    getbe64(data + entry_cl2_offset_sidedata_offset);
+		sidedata_comp_len =
+		    getbe32(data + entry_cl2_offset_sidedata_comp_len);
+		data_comp_mode = data[entry_cl2_offset_all_comp_mode] & 3;
+		sidedata_comp_mode =
+		    ((data[entry_cl2_offset_all_comp_mode] >> 2) & 3);
+		rank = getbe32(data + entry_cl2_offset_rank);
 	} else {
 		raise_revlog_error();
 		return NULL;
@@ -442,7 +489,7 @@
 	return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
 	                     base_rev, link_rev, parent_1, parent_2, c_node_id,
 	                     self->nodelen, sidedata_offset, sidedata_comp_len,
-	                     data_comp_mode, sidedata_comp_mode, rank_unknown);
+	                     data_comp_mode, sidedata_comp_mode, rank);
 }
 /*
  * Pack header information in binary
@@ -514,6 +561,8 @@
 		node_id = data + entry_v1_offset_node_id;
 	} else if (self->format_version == format_v2) {
 		node_id = data + entry_v2_offset_node_id;
+	} else if (self->format_version == format_cl2) {
+		node_id = data + entry_cl2_offset_node_id;
 	} else {
 		raise_revlog_error();
 		return NULL;
@@ -636,6 +685,24 @@
 		comp_field = data_comp_mode & 3;
 		comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
 		data[entry_v2_offset_all_comp_mode] = comp_field;
+	} else if (self->format_version == format_cl2) {
+		putbe32(offset_flags >> 32, data + entry_cl2_offset_high);
+		putbe32(offset_flags & 0xffffffffU,
+		        data + entry_cl2_offset_offset_flags);
+		putbe32(comp_len, data + entry_cl2_offset_comp_len);
+		putbe32(uncomp_len, data + entry_cl2_offset_uncomp_len);
+		putbe32(parent_1, data + entry_cl2_offset_parent_1);
+		putbe32(parent_2, data + entry_cl2_offset_parent_2);
+		memcpy(data + entry_cl2_offset_node_id, c_node_id,
+		       c_node_id_len);
+		putbe64(sidedata_offset,
+		        data + entry_cl2_offset_sidedata_offset);
+		putbe32(sidedata_comp_len,
+		        data + entry_cl2_offset_sidedata_comp_len);
+		comp_field = data_comp_mode & 3;
+		comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
+		data[entry_cl2_offset_all_comp_mode] = comp_field;
+		putbe32(rank, data + entry_cl2_offset_rank);
 	} else {
 		raise_revlog_error();
 		return NULL;
@@ -693,12 +760,28 @@
 	/* Find the newly added node, offset from the "already on-disk" length
 	 */
 	data = self->added + self->entry_size * (rev - self->length);
-	putbe64(offset_flags, data + entry_v2_offset_high);
-	putbe64(sidedata_offset, data + entry_v2_offset_sidedata_offset);
-	putbe32(sidedata_comp_len, data + entry_v2_offset_sidedata_comp_len);
-	data[entry_v2_offset_all_comp_mode] =
-	    (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
-	    ((comp_mode & 3) << 2);
+	if (self->format_version == format_v2) {
+		putbe64(offset_flags, data + entry_v2_offset_high);
+		putbe64(sidedata_offset,
+		        data + entry_v2_offset_sidedata_offset);
+		putbe32(sidedata_comp_len,
+		        data + entry_v2_offset_sidedata_comp_len);
+		data[entry_v2_offset_all_comp_mode] =
+		    (data[entry_v2_offset_all_comp_mode] & ~(3 << 2)) |
+		    ((comp_mode & 3) << 2);
+	} else if (self->format_version == format_cl2) {
+		putbe64(offset_flags, data + entry_cl2_offset_high);
+		putbe64(sidedata_offset,
+		        data + entry_cl2_offset_sidedata_offset);
+		putbe32(sidedata_comp_len,
+		        data + entry_cl2_offset_sidedata_comp_len);
+		data[entry_cl2_offset_all_comp_mode] =
+		    (data[entry_cl2_offset_all_comp_mode] & ~(3 << 2)) |
+		    ((comp_mode & 3) << 2);
+	} else {
+		raise_revlog_error();
+		return NULL;
+	}
 
 	Py_RETURN_NONE;
 }
@@ -1246,6 +1329,8 @@
 		result = getbe32(data + entry_v1_offset_base_rev);
 	} else if (self->format_version == format_v2) {
 		result = getbe32(data + entry_v2_offset_base_rev);
+	} else if (self->format_version == format_cl2) {
+		return rev;
 	} else {
 		raise_revlog_error();
 		return -1;