revlog: have an explicit "pack_header" method
authorPierre-Yves David <pierre-yves.david@octobus.net>
Thu, 01 Apr 2021 11:31:54 +0200
changeset 47037 d57386e5c80e
parent 47036 5e64c93d5f94
child 47038 724db234b790
revlog: have an explicit "pack_header" method Having to pass the version header when retrieving the binary version of every single entry is a bit silly. So we extract that special logic in its own method. This also prepare the move to newer revlog format, not storing the header within an actual entry… Differential Revision: https://phab.mercurial-scm.org/D10510
mercurial/cext/revlog.c
mercurial/pure/parsers.py
mercurial/revlog.py
rust/hg-cpython/src/revlog.rs
--- a/mercurial/cext/revlog.c	Sat May 01 14:47:39 2021 +0200
+++ b/mercurial/cext/revlog.c	Thu Apr 01 11:31:54 2021 +0200
@@ -343,18 +343,28 @@
 	}
 }
 /*
+ * Pack header information in binary
+ */
+static PyObject *index_pack_header(indexObject *self, PyObject *args)
+{
+	int header;
+	char out[4];
+	if (!PyArg_ParseTuple(args, "I", &header)) {
+		return NULL;
+	}
+	putbe32(header, out);
+	return PyBytes_FromStringAndSize(out, 4);
+}
+/*
  * Return the raw binary string representing a revision
  */
-static PyObject *index_entry_binary(indexObject *self, PyObject *args)
+static PyObject *index_entry_binary(indexObject *self, PyObject *value)
 {
 	long rev;
-	int header;
 	const char *data;
-	char entry[v2_hdrsize];
-
 	Py_ssize_t length = index_length(self);
 
-	if (!PyArg_ParseTuple(args, "lI", &rev, &header)) {
+	if (!pylong_to_long(value, &rev)) {
 		return NULL;
 	}
 	if (rev < 0 || rev >= length) {
@@ -367,10 +377,8 @@
 	if (data == NULL)
 		return NULL;
 	if (rev == 0) {
-		/* put the header at the start of the first entry */
-		memcpy(entry, data, self->hdrsize);
-		putbe32(header, entry);
-		return PyBytes_FromStringAndSize(entry, self->hdrsize);
+		/* the header is eating the start of the first entry */
+		return PyBytes_FromStringAndSize(data + 4, self->hdrsize - 4);
 	}
 	return PyBytes_FromStringAndSize(data, self->hdrsize);
 }
@@ -2891,8 +2899,10 @@
     {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
      "find length of shortest hex nodeid of a binary ID"},
     {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
-    {"entry_binary", (PyCFunction)index_entry_binary, METH_VARARGS,
+    {"entry_binary", (PyCFunction)index_entry_binary, METH_O,
      "return an entry in binary form"},
+    {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS,
+     "pack the revlog header information into binary"},
     {NULL} /* Sentinel */
 };
 
--- a/mercurial/pure/parsers.py	Sat May 01 14:47:39 2021 +0200
+++ b/mercurial/pure/parsers.py	Thu Apr 01 11:31:54 2021 +0200
@@ -127,14 +127,17 @@
             r = (offset_type(0, gettype(r[0])),) + r[1:]
         return r
 
-    def entry_binary(self, rev, header):
+    def pack_header(self, header):
+        """pack header information as binary"""
+        v_fmt = revlog_constants.INDEX_HEADER
+        return v_fmt.pack(header)
+
+    def entry_binary(self, rev):
         """return the raw binary string representing a revision"""
         entry = self[rev]
         p = revlog_constants.INDEX_ENTRY_V1.pack(*entry)
         if rev == 0:
-            v_fmt = revlog_constants.INDEX_HEADER
-            v_bin = v_fmt.pack(header)
-            p = v_bin + p[v_fmt.size :]
+            p = p[revlog_constants.INDEX_HEADER.size :]
         return p
 
 
@@ -286,14 +289,12 @@
             msg = b"cannot rewrite entries outside of this transaction"
             raise KeyError(msg)
 
-    def entry_binary(self, rev, header):
+    def entry_binary(self, rev):
         """return the raw binary string representing a revision"""
         entry = self[rev]
         p = revlog_constants.INDEX_ENTRY_V2.pack(*entry)
         if rev == 0:
-            v_fmt = revlog_constants.INDEX_HEADER
-            v_bin = v_fmt.pack(header)
-            p = v_bin + p[v_fmt.size :]
+            p = p[revlog_constants.INDEX_HEADER.size :]
         return p
 
 
--- a/mercurial/revlog.py	Sat May 01 14:47:39 2021 +0200
+++ b/mercurial/revlog.py	Thu Apr 01 11:31:54 2021 +0200
@@ -266,7 +266,7 @@
             return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
         return list.__getitem__(self, i)
 
-    def entry_binary(self, rev, header):
+    def entry_binary(self, rev):
         """return the raw binary string representing a revision"""
         entry = self[rev]
         if gettype(entry[0]):
@@ -284,6 +284,10 @@
         )
         return INDEX_ENTRY_V0.pack(*e2)
 
+    def pack_header(self, header):
+        """Pack header information in binary"""
+        return b''
+
 
 def parse_index_v0(data, inline):
     s = INDEX_ENTRY_V0.size
@@ -2041,7 +2045,10 @@
             self.version &= ~FLAG_INLINE_DATA
             self._inline = False
             for i in self:
-                e = self.index.entry_binary(i, self.version)
+                e = self.index.entry_binary(i)
+                if i == 0:
+                    header = self.index.pack_header(self.version)
+                    e = header + e
                 fp.write(e)
 
             # the temp file replace the real index when we exit the context
@@ -2363,7 +2370,10 @@
             e = e[:8]
 
         self.index.append(e)
-        entry = self.index.entry_binary(curr, self.version)
+        entry = self.index.entry_binary(curr)
+        if curr == 0:
+            header = self.index.pack_header(self.version)
+            entry = header + entry
         self._writeentry(
             transaction,
             ifh,
@@ -3216,5 +3226,8 @@
             for i, entry in enumerate(new_entries):
                 rev = startrev + i
                 self.index.replace_sidedata_info(rev, entry[8], entry[9])
-                packed = self.index.entry_binary(rev, self.version)
+                packed = self.index.entry_binary(rev)
+                if rev == 0:
+                    header = self.index.pack_header(self.version)
+                    packed = header + packed
                 fp.write(packed)
--- a/rust/hg-cpython/src/revlog.rs	Sat May 01 14:47:39 2021 +0200
+++ b/rust/hg-cpython/src/revlog.rs	Thu Apr 01 11:31:54 2021 +0200
@@ -177,6 +177,11 @@
         self.call_cindex(py, "entry_binary", args, kw)
     }
 
+    /// return a binary packed version of the header
+    def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
+        self.call_cindex(py, "pack_header", args, kw)
+    }
+
     /// get an index entry
     def get(&self, *args, **kw) -> PyResult<PyObject> {
         self.call_cindex(py, "get", args, kw)