# HG changeset patch # User Pierre-Yves David # Date 1617832871 -7200 # Node ID 0d8ff1f4ab0ccf41a74908e0fb91fdbf8c2760e1 # Parent 824ee4aaa09b5cc1ae917be5f3ca4bc0b708c4a8 revlog: add a `entry_binary` method on index The revlog index is already responsible for unpacking the binary entry, it would be simpler to make it responsible for packing them. In practice the C version of the index is already doing this internally. We introduce a "entry_binary" method that return the binary version of an existing revision. The method currently need to also take the revlog header to deal with the "first revision" special case. We will introduce further refactor in a later changeset to split that logic out. Differential Revision: https://phab.mercurial-scm.org/D10508 diff -r 824ee4aaa09b -r 0d8ff1f4ab0c mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c Thu Apr 15 12:08:34 2021 +0200 +++ b/mercurial/cext/revlog.c Thu Apr 08 00:01:11 2021 +0200 @@ -342,6 +342,38 @@ sidedata_offset, sidedata_comp_len); } } +/* + * Return the raw binary string representing a revision + */ +static PyObject *index_entry_binary(indexObject *self, PyObject *args) +{ + long rev; + int header; + const char *data; + char entry[v2_hdrsize]; + + Py_ssize_t length = index_length(self); + + if (!PyArg_ParseTuple(args, "lI", &rev, &header)) { + return NULL; + } + if (rev < 0 || rev >= length) { + PyErr_Format(PyExc_ValueError, "revlog index out of range: %ld", + rev); + return NULL; + }; + + data = index_deref(self, rev); + if (data == NULL) + return NULL; + if (rev == 0) { + // put the header at the start of the first entry + memcpy(entry, data, self->hdrsize); + putbe32(header, entry); + return PyBytes_FromStringAndSize(entry, self->hdrsize); + } + return PyBytes_FromStringAndSize(data, self->hdrsize); +} /* * Return the hash of node corresponding to the given rev. @@ -2859,6 +2891,8 @@ {"shortest", (PyCFunction)index_shortest, METH_VARARGS, "find length of shortest hex nodeid of a binary ID"}, {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"}, + {"entry_binary", (PyCFunction)index_entry_binary, METH_VARARGS, + "return an entry in binary form"}, {NULL} /* Sentinel */ }; diff -r 824ee4aaa09b -r 0d8ff1f4ab0c mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py Thu Apr 15 12:08:34 2021 +0200 +++ b/mercurial/pure/parsers.py Thu Apr 08 00:01:11 2021 +0200 @@ -127,10 +127,24 @@ r = (offset_type(0, gettype(r[0])),) + r[1:] return r + def entry_binary(self, rev, header): + """return the raw binary string representing a revision""" + entry = self[rev] + p = revlog_constants.INDEX_ENTRY_V1.pack(*entry) + if rev == 0: + v_fmt = revlog_constants.INDEX_HEADER + v_bin = v_fmt.pack(header) + p = v_bin + p[v_fmt.size :] + return p + class IndexObject(BaseIndexObject): def __init__(self, data): - assert len(data) % self.entry_size == 0 + assert len(data) % self.entry_size == 0, ( + len(data), + self.entry_size, + len(data) % self.entry_size, + ) self._data = data self._lgt = len(data) // self.entry_size self._extra = [] @@ -272,6 +286,16 @@ msg = b"cannot rewrite entries outside of this transaction" raise KeyError(msg) + def entry_binary(self, rev, header): + """return the raw binary string representing a revision""" + entry = self[rev] + p = revlog_constants.INDEX_ENTRY_V2.pack(*entry) + if rev == 0: + v_fmt = revlog_constants.INDEX_HEADER + v_bin = v_fmt.pack(header) + p = v_bin + p[v_fmt.size :] + return p + class IndexObject2(Index2Mixin, IndexObject): pass diff -r 824ee4aaa09b -r 0d8ff1f4ab0c mercurial/revlog.py --- a/mercurial/revlog.py Thu Apr 15 12:08:34 2021 +0200 +++ b/mercurial/revlog.py Thu Apr 08 00:01:11 2021 +0200 @@ -268,6 +268,24 @@ return (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid) return list.__getitem__(self, i) + def entry_binary(self, rev, header): + """return the raw binary string representing a revision""" + entry = self[rev] + if gettype(entry[0]): + raise error.RevlogError( + _(b'index entry flags need revlog version 1') + ) + e2 = ( + getoffset(entry[0]), + entry[1], + entry[3], + entry[4], + self[entry[5]][7], + self[entry[6]][7], + entry[7], + ) + return INDEX_ENTRY_V0.pack(*e2) + class revlogoldio(object): def parseindex(self, data, inline): @@ -298,29 +316,6 @@ index = revlogoldindex(index) return index, None - def packentry(self, entry, node, version, rev): - """return the binary representation of an entry - - entry: a tuple containing all the values (see index.__getitem__) - node: a callback to convert a revision to nodeid - version: the changelog version - rev: the revision number - """ - if gettype(entry[0]): - raise error.RevlogError( - _(b'index entry flags need revlog version 1') - ) - e2 = ( - getoffset(entry[0]), - entry[1], - entry[3], - entry[4], - node(entry[5]), - node(entry[6]), - entry[7], - ) - return INDEX_ENTRY_V0.pack(*e2) - # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte # signed integer) @@ -333,24 +328,12 @@ index, cache = parsers.parse_index2(data, inline) return index, cache - def packentry(self, entry, node, version, rev): - p = INDEX_ENTRY_V1.pack(*entry) - if rev == 0: - p = INDEX_HEADER.pack(version) + p[4:] - return p - class revlogv2io(object): def parseindex(self, data, inline): index, cache = parsers.parse_index2(data, inline, revlogv2=True) return index, cache - def packentry(self, entry, node, version, rev): - p = INDEX_ENTRY_V2.pack(*entry) - if rev == 0: - p = INDEX_HEADER.pack(version) + p[4:] - return p - NodemapRevlogIO = None @@ -2068,7 +2051,7 @@ self._inline = False io = self._io for i in self: - e = io.packentry(self.index[i], self.node, self.version, i) + e = self.index.entry_binary(i, self.version) fp.write(e) # the temp file replace the real index when we exit the context @@ -2390,7 +2373,7 @@ e = e[:8] self.index.append(e) - entry = self._io.packentry(e, self.node, self.version, curr) + entry = self.index.entry_binary(curr, self.version) self._writeentry( transaction, ifh, @@ -3243,5 +3226,5 @@ for i, entry in enumerate(new_entries): rev = startrev + i self.index.replace_sidedata_info(rev, entry[8], entry[9]) - packed = self._io.packentry(entry, self.node, self.version, rev) + packed = self.index.entry_binary(rev, self.version) fp.write(packed) diff -r 824ee4aaa09b -r 0d8ff1f4ab0c rust/hg-cpython/src/revlog.rs --- a/rust/hg-cpython/src/revlog.rs Thu Apr 15 12:08:34 2021 +0200 +++ b/rust/hg-cpython/src/revlog.rs Thu Apr 08 00:01:11 2021 +0200 @@ -172,6 +172,11 @@ self.call_cindex(py, "clearcaches", args, kw) } + /// return the raw binary string representing a revision + def entry_binary(&self, *args, **kw) -> PyResult { + self.call_cindex(py, "entry_binary", args, kw) + } + /// get an index entry def get(&self, *args, **kw) -> PyResult { self.call_cindex(py, "get", args, kw)