# HG changeset patch # User Pierre-Yves David # Date 1617269514 -7200 # Node ID d57386e5c80e7f08b7b111c4777a8575011779e4 # Parent 5e64c93d5f9407d0d29909c14bb3d39fb9817aca revlog: have an explicit "pack_header" method Having to pass the version header when retrieving the binary version of every single entry is a bit silly. So we extract that special logic in its own method. This also prepare the move to newer revlog format, not storing the header within an actual entry… Differential Revision: https://phab.mercurial-scm.org/D10510 diff -r 5e64c93d5f94 -r d57386e5c80e mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c Sat May 01 14:47:39 2021 +0200 +++ b/mercurial/cext/revlog.c Thu Apr 01 11:31:54 2021 +0200 @@ -343,18 +343,28 @@ } } /* + * Pack header information in binary + */ +static PyObject *index_pack_header(indexObject *self, PyObject *args) +{ + int header; + char out[4]; + if (!PyArg_ParseTuple(args, "I", &header)) { + return NULL; + } + putbe32(header, out); + return PyBytes_FromStringAndSize(out, 4); +} +/* * Return the raw binary string representing a revision */ -static PyObject *index_entry_binary(indexObject *self, PyObject *args) +static PyObject *index_entry_binary(indexObject *self, PyObject *value) { long rev; - int header; const char *data; - char entry[v2_hdrsize]; - Py_ssize_t length = index_length(self); - if (!PyArg_ParseTuple(args, "lI", &rev, &header)) { + if (!pylong_to_long(value, &rev)) { return NULL; } if (rev < 0 || rev >= length) { @@ -367,10 +377,8 @@ if (data == NULL) return NULL; if (rev == 0) { - /* put the header at the start of the first entry */ - memcpy(entry, data, self->hdrsize); - putbe32(header, entry); - return PyBytes_FromStringAndSize(entry, self->hdrsize); + /* the header is eating the start of the first entry */ + return PyBytes_FromStringAndSize(data + 4, self->hdrsize - 4); } return PyBytes_FromStringAndSize(data, self->hdrsize); } @@ -2891,8 +2899,10 @@ {"shortest", (PyCFunction)index_shortest, METH_VARARGS, "find length of shortest hex nodeid of a binary ID"}, {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"}, - {"entry_binary", (PyCFunction)index_entry_binary, METH_VARARGS, + {"entry_binary", (PyCFunction)index_entry_binary, METH_O, "return an entry in binary form"}, + {"pack_header", (PyCFunction)index_pack_header, METH_VARARGS, + "pack the revlog header information into binary"}, {NULL} /* Sentinel */ }; diff -r 5e64c93d5f94 -r d57386e5c80e mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py Sat May 01 14:47:39 2021 +0200 +++ b/mercurial/pure/parsers.py Thu Apr 01 11:31:54 2021 +0200 @@ -127,14 +127,17 @@ r = (offset_type(0, gettype(r[0])),) + r[1:] return r - def entry_binary(self, rev, header): + def pack_header(self, header): + """pack header information as binary""" + v_fmt = revlog_constants.INDEX_HEADER + return v_fmt.pack(header) + + def entry_binary(self, rev): """return the raw binary string representing a revision""" entry = self[rev] p = revlog_constants.INDEX_ENTRY_V1.pack(*entry) if rev == 0: - v_fmt = revlog_constants.INDEX_HEADER - v_bin = v_fmt.pack(header) - p = v_bin + p[v_fmt.size :] + p = p[revlog_constants.INDEX_HEADER.size :] return p @@ -286,14 +289,12 @@ msg = b"cannot rewrite entries outside of this transaction" raise KeyError(msg) - def entry_binary(self, rev, header): + def entry_binary(self, rev): """return the raw binary string representing a revision""" entry = self[rev] p = revlog_constants.INDEX_ENTRY_V2.pack(*entry) if rev == 0: - v_fmt = revlog_constants.INDEX_HEADER - v_bin = v_fmt.pack(header) - p = v_bin + p[v_fmt.size :] + p = p[revlog_constants.INDEX_HEADER.size :] return p diff -r 5e64c93d5f94 -r d57386e5c80e mercurial/revlog.py --- a/mercurial/revlog.py Sat May 01 14:47:39 2021 +0200 +++ b/mercurial/revlog.py Thu Apr 01 11:31:54 2021 +0200 @@ -266,7 +266,7 @@ return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid) return list.__getitem__(self, i) - def entry_binary(self, rev, header): + def entry_binary(self, rev): """return the raw binary string representing a revision""" entry = self[rev] if gettype(entry[0]): @@ -284,6 +284,10 @@ ) return INDEX_ENTRY_V0.pack(*e2) + def pack_header(self, header): + """Pack header information in binary""" + return b'' + def parse_index_v0(data, inline): s = INDEX_ENTRY_V0.size @@ -2041,7 +2045,10 @@ self.version &= ~FLAG_INLINE_DATA self._inline = False for i in self: - e = self.index.entry_binary(i, self.version) + e = self.index.entry_binary(i) + if i == 0: + header = self.index.pack_header(self.version) + e = header + e fp.write(e) # the temp file replace the real index when we exit the context @@ -2363,7 +2370,10 @@ e = e[:8] self.index.append(e) - entry = self.index.entry_binary(curr, self.version) + entry = self.index.entry_binary(curr) + if curr == 0: + header = self.index.pack_header(self.version) + entry = header + entry self._writeentry( transaction, ifh, @@ -3216,5 +3226,8 @@ for i, entry in enumerate(new_entries): rev = startrev + i self.index.replace_sidedata_info(rev, entry[8], entry[9]) - packed = self.index.entry_binary(rev, self.version) + packed = self.index.entry_binary(rev) + if rev == 0: + header = self.index.pack_header(self.version) + packed = header + packed fp.write(packed) diff -r 5e64c93d5f94 -r d57386e5c80e rust/hg-cpython/src/revlog.rs --- a/rust/hg-cpython/src/revlog.rs Sat May 01 14:47:39 2021 +0200 +++ b/rust/hg-cpython/src/revlog.rs Thu Apr 01 11:31:54 2021 +0200 @@ -177,6 +177,11 @@ self.call_cindex(py, "entry_binary", args, kw) } + /// return a binary packed version of the header + def pack_header(&self, *args, **kw) -> PyResult { + self.call_cindex(py, "pack_header", args, kw) + } + /// get an index entry def get(&self, *args, **kw) -> PyResult { self.call_cindex(py, "get", args, kw)