revlog: add a `entry_binary` method on index
authorPierre-Yves David <pierre-yves.david@octobus.net>
Thu, 08 Apr 2021 00:01:11 +0200
changeset 47034 0d8ff1f4ab0c
parent 47033 824ee4aaa09b
child 47035 4f2b5f9d8cc4
revlog: add a `entry_binary` method on index The revlog index is already responsible for unpacking the binary entry, it would be simpler to make it responsible for packing them. In practice the C version of the index is already doing this internally. We introduce a "entry_binary" method that return the binary version of an existing revision. The method currently need to also take the revlog header to deal with the "first revision" special case. We will introduce further refactor in a later changeset to split that logic out. Differential Revision: https://phab.mercurial-scm.org/D10508
mercurial/cext/revlog.c
mercurial/pure/parsers.py
mercurial/revlog.py
rust/hg-cpython/src/revlog.rs
--- a/mercurial/cext/revlog.c	Thu Apr 15 12:08:34 2021 +0200
+++ b/mercurial/cext/revlog.c	Thu Apr 08 00:01:11 2021 +0200
@@ -342,6 +342,38 @@
 		                     sidedata_offset, sidedata_comp_len);
 	}
 }
+/*
+ * Return the raw binary string representing a revision
+ */
+static PyObject *index_entry_binary(indexObject *self, PyObject *args)
+{
+	long rev;
+	int header;
+	const char *data;
+	char entry[v2_hdrsize];
+
+	Py_ssize_t length = index_length(self);
+
+	if (!PyArg_ParseTuple(args, "lI", &rev, &header)) {
+		return NULL;
+	}
+	if (rev < 0 || rev >= length) {
+		PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld",
+		             rev);
+		return NULL;
+	};
+
+	data = index_deref(self, rev);
+	if (data == NULL)
+		return NULL;
+	if (rev == 0) {
+		// put the header at the start of the first entry
+		memcpy(entry, data, self->hdrsize);
+		putbe32(header, entry);
+		return PyBytes_FromStringAndSize(entry, self->hdrsize);
+	}
+	return PyBytes_FromStringAndSize(data, self->hdrsize);
+}
 
 /*
  * Return the hash of node corresponding to the given rev.
@@ -2859,6 +2891,8 @@
     {"shortest", (PyCFunction)index_shortest, METH_VARARGS,
      "find length of shortest hex nodeid of a binary ID"},
     {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"},
+    {"entry_binary", (PyCFunction)index_entry_binary, METH_VARARGS,
+     "return an entry in binary form"},
     {NULL} /* Sentinel */
 };
 
--- a/mercurial/pure/parsers.py	Thu Apr 15 12:08:34 2021 +0200
+++ b/mercurial/pure/parsers.py	Thu Apr 08 00:01:11 2021 +0200
@@ -127,10 +127,24 @@
             r = (offset_type(0, gettype(r[0])),) + r[1:]
         return r
 
+    def entry_binary(self, rev, header):
+        """return the raw binary string representing a revision"""
+        entry = self[rev]
+        p = revlog_constants.INDEX_ENTRY_V1.pack(*entry)
+        if rev == 0:
+            v_fmt = revlog_constants.INDEX_HEADER
+            v_bin = v_fmt.pack(header)
+            p = v_bin + p[v_fmt.size :]
+        return p
+
 
 class IndexObject(BaseIndexObject):
     def __init__(self, data):
-        assert len(data) % self.entry_size == 0
+        assert len(data) % self.entry_size == 0, (
+            len(data),
+            self.entry_size,
+            len(data) % self.entry_size,
+        )
         self._data = data
         self._lgt = len(data) // self.entry_size
         self._extra = []
@@ -272,6 +286,16 @@
             msg = b"cannot rewrite entries outside of this transaction"
             raise KeyError(msg)
 
+    def entry_binary(self, rev, header):
+        """return the raw binary string representing a revision"""
+        entry = self[rev]
+        p = revlog_constants.INDEX_ENTRY_V2.pack(*entry)
+        if rev == 0:
+            v_fmt = revlog_constants.INDEX_HEADER
+            v_bin = v_fmt.pack(header)
+            p = v_bin + p[v_fmt.size :]
+        return p
+
 
 class IndexObject2(Index2Mixin, IndexObject):
     pass
--- a/mercurial/revlog.py	Thu Apr 15 12:08:34 2021 +0200
+++ b/mercurial/revlog.py	Thu Apr 08 00:01:11 2021 +0200
@@ -268,6 +268,24 @@
             return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid)
         return list.__getitem__(self, i)
 
+    def entry_binary(self, rev, header):
+        """return the raw binary string representing a revision"""
+        entry = self[rev]
+        if gettype(entry[0]):
+            raise error.RevlogError(
+                _(b'index entry flags need revlog version 1')
+            )
+        e2 = (
+            getoffset(entry[0]),
+            entry[1],
+            entry[3],
+            entry[4],
+            self[entry[5]][7],
+            self[entry[6]][7],
+            entry[7],
+        )
+        return INDEX_ENTRY_V0.pack(*e2)
+
 
 class revlogoldio(object):
     def parseindex(self, data, inline):
@@ -298,29 +316,6 @@
         index = revlogoldindex(index)
         return index, None
 
-    def packentry(self, entry, node, version, rev):
-        """return the binary representation of an entry
-
-        entry:   a tuple containing all the values (see index.__getitem__)
-        node:    a callback to convert a revision to nodeid
-        version: the changelog version
-        rev:     the revision number
-        """
-        if gettype(entry[0]):
-            raise error.RevlogError(
-                _(b'index entry flags need revlog version 1')
-            )
-        e2 = (
-            getoffset(entry[0]),
-            entry[1],
-            entry[3],
-            entry[4],
-            node(entry[5]),
-            node(entry[6]),
-            entry[7],
-        )
-        return INDEX_ENTRY_V0.pack(*e2)
-
 
 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
 # signed integer)
@@ -333,24 +328,12 @@
         index, cache = parsers.parse_index2(data, inline)
         return index, cache
 
-    def packentry(self, entry, node, version, rev):
-        p = INDEX_ENTRY_V1.pack(*entry)
-        if rev == 0:
-            p = INDEX_HEADER.pack(version) + p[4:]
-        return p
-
 
 class revlogv2io(object):
     def parseindex(self, data, inline):
         index, cache = parsers.parse_index2(data, inline, revlogv2=True)
         return index, cache
 
-    def packentry(self, entry, node, version, rev):
-        p = INDEX_ENTRY_V2.pack(*entry)
-        if rev == 0:
-            p = INDEX_HEADER.pack(version) + p[4:]
-        return p
-
 
 NodemapRevlogIO = None
 
@@ -2068,7 +2051,7 @@
             self._inline = False
             io = self._io
             for i in self:
-                e = io.packentry(self.index[i], self.node, self.version, i)
+                e = self.index.entry_binary(i, self.version)
                 fp.write(e)
 
             # the temp file replace the real index when we exit the context
@@ -2390,7 +2373,7 @@
             e = e[:8]
 
         self.index.append(e)
-        entry = self._io.packentry(e, self.node, self.version, curr)
+        entry = self.index.entry_binary(curr, self.version)
         self._writeentry(
             transaction,
             ifh,
@@ -3243,5 +3226,5 @@
             for i, entry in enumerate(new_entries):
                 rev = startrev + i
                 self.index.replace_sidedata_info(rev, entry[8], entry[9])
-                packed = self._io.packentry(entry, self.node, self.version, rev)
+                packed = self.index.entry_binary(rev, self.version)
                 fp.write(packed)
--- a/rust/hg-cpython/src/revlog.rs	Thu Apr 15 12:08:34 2021 +0200
+++ b/rust/hg-cpython/src/revlog.rs	Thu Apr 08 00:01:11 2021 +0200
@@ -172,6 +172,11 @@
         self.call_cindex(py, "clearcaches", args, kw)
     }
 
+    /// return the raw binary string representing a revision
+    def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
+        self.call_cindex(py, "entry_binary", args, kw)
+    }
+
     /// get an index entry
     def get(&self, *args, **kw) -> PyResult<PyObject> {
         self.call_cindex(py, "get", args, kw)