revlog: introduce a compression mode for sidedata in the revlog index
authorPierre-Yves David <pierre-yves.david@octobus.net>
Mon, 03 May 2021 21:34:02 +0200
changeset 47256 2b69555e4875
parent 47255 ff9fd7107d11
child 47257 87d057137f82
revlog: introduce a compression mode for sidedata in the revlog index We will use this for compression for the sidedata payload. Differential Revision: https://phab.mercurial-scm.org/D10653
mercurial/bundlerepo.py
mercurial/cext/revlog.c
mercurial/pure/parsers.py
mercurial/revlog.py
mercurial/revlogutils/revlogv0.py
mercurial/unionrepo.py
tests/test-parseindex2.py
--- a/mercurial/bundlerepo.py	Mon May 03 21:13:24 2021 +0200
+++ b/mercurial/bundlerepo.py	Mon May 03 21:34:02 2021 +0200
@@ -106,6 +106,7 @@
                 0,
                 0,
                 revlog_constants.COMP_MODE_INLINE,
+                revlog_constants.COMP_MODE_INLINE,
             )
             self.index.append(e)
             self.bundlerevs.add(n)
--- a/mercurial/cext/revlog.c	Mon May 03 21:13:24 2021 +0200
+++ b/mercurial/cext/revlog.c	Mon May 03 21:34:02 2021 +0200
@@ -118,9 +118,9 @@
 static int index_find_node(indexObject *self, const char *node);
 
 #if LONG_MAX == 0x7fffffffL
-static const char *const tuple_format = PY23("Kiiiiiis#KiB", "Kiiiiiiy#KiB");
+static const char *const tuple_format = PY23("Kiiiiiis#KiBB", "Kiiiiiiy#KiBB");
 #else
-static const char *const tuple_format = PY23("kiiiiiis#kiB", "kiiiiiiy#kiB");
+static const char *const tuple_format = PY23("kiiiiiis#kiBB", "kiiiiiiy#kiBB");
 #endif
 
 /* A RevlogNG v1 index entry is 64 bytes long. */
@@ -296,7 +296,7 @@
 	uint64_t offset_flags, sidedata_offset;
 	int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2,
 	    sidedata_comp_len;
-	char data_comp_mode;
+	char data_comp_mode, sidedata_comp_mode;
 	const char *c_node_id;
 	const char *data;
 	Py_ssize_t length = index_length(self);
@@ -339,16 +339,18 @@
 		sidedata_offset = 0;
 		sidedata_comp_len = 0;
 		data_comp_mode = comp_mode_inline;
+		sidedata_comp_mode = comp_mode_inline;
 	} else {
 		sidedata_offset = getbe64(data + 64);
 		sidedata_comp_len = getbe32(data + 72);
-		data_comp_mode = data[76];
+		data_comp_mode = data[76] & 3;
+		sidedata_comp_mode = ((data[76] >> 2) & 3);
 	}
 
 	return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
 	                     base_rev, link_rev, parent_1, parent_2, c_node_id,
 	                     self->nodelen, sidedata_offset, sidedata_comp_len,
-	                     data_comp_mode);
+	                     data_comp_mode, sidedata_comp_mode);
 }
 /*
  * Pack header information in binary
@@ -449,16 +451,17 @@
 {
 	uint64_t offset_flags, sidedata_offset;
 	int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
-	char data_comp_mode;
+	char data_comp_mode, sidedata_comp_mode;
 	Py_ssize_t c_node_id_len, sidedata_comp_len;
 	const char *c_node_id;
+	char comp_field;
 	char *data;
 
 	if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len,
 	                      &uncomp_len, &base_rev, &link_rev, &parent_1,
 	                      &parent_2, &c_node_id, &c_node_id_len,
 	                      &sidedata_offset, &sidedata_comp_len,
-	                      &data_comp_mode)) {
+	                      &data_comp_mode, &sidedata_comp_mode)) {
 		PyErr_SetString(PyExc_TypeError, "11-tuple required");
 		return NULL;
 	}
@@ -467,12 +470,20 @@
 		PyErr_SetString(PyExc_TypeError, "invalid node");
 		return NULL;
 	}
-	if (self->format_version == format_v1 &&
-	    data_comp_mode != comp_mode_inline) {
-		PyErr_Format(PyExc_ValueError,
-		             "invalid data compression mode: %i",
-		             data_comp_mode);
-		return NULL;
+	if (self->format_version == format_v1) {
+
+		if (data_comp_mode != comp_mode_inline) {
+			PyErr_Format(PyExc_ValueError,
+			             "invalid data compression mode: %i",
+			             data_comp_mode);
+			return NULL;
+		}
+		if (sidedata_comp_mode != comp_mode_inline) {
+			PyErr_Format(PyExc_ValueError,
+			             "invalid sidedata compression mode: %i",
+			             sidedata_comp_mode);
+			return NULL;
+		}
 	}
 
 	if (self->new_length == self->added_length) {
@@ -501,7 +512,9 @@
 	if (self->format_version == format_v2) {
 		putbe64(sidedata_offset, data + 64);
 		putbe32(sidedata_comp_len, data + 72);
-		data[76] = (char)data_comp_mode;
+		comp_field = data_comp_mode & 3;
+		comp_field = comp_field | (sidedata_comp_mode & 3) << 2;
+		data[76] = comp_field;
 		/* Padding for 96 bytes alignment */
 		memset(data + 77, 0, self->entry_size - 77);
 	}
@@ -2777,9 +2790,9 @@
 		self->entry_size = v1_entry_size;
 	}
 
-	self->nullentry = Py_BuildValue(PY23("iiiiiiis#iiB", "iiiiiiiy#iiB"), 0,
-	                                0, 0, -1, -1, -1, -1, nullid,
-	                                self->nodelen, 0, 0, comp_mode_inline);
+	self->nullentry = Py_BuildValue(
+	    PY23("iiiiiiis#iiBB", "iiiiiiiy#iiBB"), 0, 0, 0, -1, -1, -1, -1,
+	    nullid, self->nodelen, 0, 0, comp_mode_inline, comp_mode_inline);
 
 	if (!self->nullentry)
 		return -1;
--- a/mercurial/pure/parsers.py	Mon May 03 21:13:24 2021 +0200
+++ b/mercurial/pure/parsers.py	Mon May 03 21:34:02 2021 +0200
@@ -66,6 +66,7 @@
         0,
         0,
         revlog_constants.COMP_MODE_INLINE,
+        revlog_constants.COMP_MODE_INLINE,
     )
 
     @util.propertycache
@@ -147,7 +148,12 @@
 
     def _unpack_entry(self, data):
         r = self.index_format.unpack(data)
-        r = r + (0, 0, revlog_constants.COMP_MODE_INLINE)
+        r = r + (
+            0,
+            0,
+            revlog_constants.COMP_MODE_INLINE,
+            revlog_constants.COMP_MODE_INLINE,
+        )
         return r
 
     def pack_header(self, header):
@@ -315,10 +321,19 @@
             self._extra[rev - self._lgt] = new
 
     def _unpack_entry(self, data):
-        return self.index_format.unpack(data)
+        data = self.index_format.unpack(data)
+        entry = data[:10]
+        data_comp = data[10] & 3
+        sidedata_comp = (data[10] & (3 << 2)) >> 2
+        return entry + (data_comp, sidedata_comp)
 
     def _pack_entry(self, entry):
-        return self.index_format.pack(*entry[:11])
+        data = entry[:10]
+        data_comp = entry[10] & 3
+        sidedata_comp = (entry[11] & 3) << 2
+        data += (data_comp | sidedata_comp,)
+
+        return self.index_format.pack(*data)
 
     def entry_binary(self, rev):
         """return the raw binary string representing a revision"""
--- a/mercurial/revlog.py	Mon May 03 21:13:24 2021 +0200
+++ b/mercurial/revlog.py	Mon May 03 21:34:02 2021 +0200
@@ -345,6 +345,9 @@
             (see "COMP_MODE_*" constants for details). For revlog version 0 and
             1 this will always be COMP_MODE_INLINE.
 
+    [11] side-data compression mode:
+            two bits that detail the way the sidedata chunk is compressed on disk.
+            (see "COMP_MODE_*" constants for details)
     """
 
     _flagserrorclass = error.RevlogError
@@ -2517,7 +2520,9 @@
                 compression_mode = COMP_MODE_PLAIN
                 deltainfo = deltautil.drop_u_compression(deltainfo)
 
+        sidedata_compression_mode = COMP_MODE_INLINE
         if sidedata and self.hassidedata:
+            sidedata_compression_mode = COMP_MODE_PLAIN
             serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
             sidedata_offset = offset + deltainfo.deltalen
         else:
@@ -2539,6 +2544,7 @@
             sidedata_offset,
             len(serialized_sidedata),
             compression_mode,
+            sidedata_compression_mode,
         )
 
         self.index.append(e)
--- a/mercurial/revlogutils/revlogv0.py	Mon May 03 21:13:24 2021 +0200
+++ b/mercurial/revlogutils/revlogv0.py	Mon May 03 21:34:02 2021 +0200
@@ -55,6 +55,7 @@
         0,
         0,
         COMP_MODE_INLINE,
+        COMP_MODE_INLINE,
     )
 
     @property
--- a/mercurial/unionrepo.py	Mon May 03 21:13:24 2021 +0200
+++ b/mercurial/unionrepo.py	Mon May 03 21:34:02 2021 +0200
@@ -70,6 +70,7 @@
                 _sdo,
                 _sds,
                 _dcm,
+                _sdcm,
             ) = rev
             flags = _start & 0xFFFF
 
@@ -105,6 +106,7 @@
                 0,  # sidedata offset
                 0,  # sidedata size
                 revlog_constants.COMP_MODE_INLINE,
+                revlog_constants.COMP_MODE_INLINE,
             )
             self.index.append(e)
             self.bundlerevs.add(n)
--- a/tests/test-parseindex2.py	Mon May 03 21:13:24 2021 +0200
+++ b/tests/test-parseindex2.py	Mon May 03 21:34:02 2021 +0200
@@ -52,7 +52,12 @@
         cache = (0, data)
         while off <= l:
             e = struct.unpack(indexformatng, data[off : off + s])
-            e = e + (0, 0, constants.COMP_MODE_INLINE)
+            e = e + (
+                0,
+                0,
+                constants.COMP_MODE_INLINE,
+                constants.COMP_MODE_INLINE,
+            )
             nodemap[e[7]] = n
             append(e)
             n += 1
@@ -62,7 +67,12 @@
     else:
         while off <= l:
             e = struct.unpack(indexformatng, data[off : off + s])
-            e = e + (0, 0, constants.COMP_MODE_INLINE)
+            e = e + (
+                0,
+                0,
+                constants.COMP_MODE_INLINE,
+                constants.COMP_MODE_INLINE,
+            )
             nodemap[e[7]] = n
             append(e)
             n += 1
@@ -257,6 +267,7 @@
             0,
             0,
             constants.COMP_MODE_INLINE,
+            constants.COMP_MODE_INLINE,
         )
         index, junk = parsers.parse_index2(data_inlined, True)
         got = index[-1]
@@ -291,6 +302,7 @@
                 0,
                 0,
                 constants.COMP_MODE_INLINE,
+                constants.COMP_MODE_INLINE,
             )
             index.append(e)