# HG changeset patch # User Pierre-Yves David # Date 1620058756 -7200 # Node ID 130c9f7ed9147092a0132af57d095363f49bba31 # Parent 013c645dd28c3ba57e1524a2b0cbb0f43aaaf8db revlog: add a "data compression mode" entry in the index tuple That will make it possible to keep track of compression information in the revlog index, opening the way to more efficient revision restoration (in native code, but the python usage is already defeating performance work). We start with adding a new entry to the index tuple, using a value matching the current behavior. We will introduce storage and other value in later changesets. Differential Revision: https://phab.mercurial-scm.org/D10646 diff -r 013c645dd28c -r 130c9f7ed914 mercurial/bundlerepo.py --- a/mercurial/bundlerepo.py Tue May 04 01:15:03 2021 +0200 +++ b/mercurial/bundlerepo.py Mon May 03 18:19:16 2021 +0200 @@ -105,6 +105,7 @@ node, 0, 0, + revlog_constants.COMP_MODE_INLINE, ) self.index.append(e) self.bundlerevs.add(n) diff -r 013c645dd28c -r 130c9f7ed914 mercurial/cext/parsers.c --- a/mercurial/cext/parsers.c Tue May 04 01:15:03 2021 +0200 +++ b/mercurial/cext/parsers.c Mon May 03 18:19:16 2021 +0200 @@ -668,7 +668,7 @@ void manifest_module_init(PyObject *mod); void revlog_module_init(PyObject *mod); -static const int version = 18; +static const int version = 19; static void module_init(PyObject *mod) { diff -r 013c645dd28c -r 130c9f7ed914 mercurial/cext/revlog.c --- a/mercurial/cext/revlog.c Tue May 04 01:15:03 2021 +0200 +++ b/mercurial/cext/revlog.c Mon May 03 18:19:16 2021 +0200 @@ -118,9 +118,9 @@ static int index_find_node(indexObject *self, const char *node); #if LONG_MAX == 0x7fffffffL -static const char *const tuple_format = PY23("Kiiiiiis#Ki", "Kiiiiiiy#Ki"); +static const char *const tuple_format = PY23("Kiiiiiis#KiB", "Kiiiiiiy#KiB"); #else -static const char *const tuple_format = PY23("kiiiiiis#ki", "kiiiiiiy#ki"); +static const char *const tuple_format = PY23("kiiiiiis#kiB", "kiiiiiiy#kiB"); #endif /* A RevlogNG v1 index entry is 64 bytes long. */ @@ -132,6 +132,8 @@ static const long format_v1 = 1; /* Internal only, could be any number */ static const long format_v2 = 2; /* Internal only, could be any number */ +static const char comp_mode_inline = 2; + static void raise_revlog_error(void) { PyObject *mod = NULL, *dict = NULL, *errclass = NULL; @@ -294,6 +296,7 @@ uint64_t offset_flags, sidedata_offset; int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2, sidedata_comp_len; + char data_comp_mode; const char *c_node_id; const char *data; Py_ssize_t length = index_length(self); @@ -340,9 +343,11 @@ sidedata_comp_len = getbe32(data + 72); } + data_comp_mode = comp_mode_inline; return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2, c_node_id, - self->nodelen, sidedata_offset, sidedata_comp_len); + self->nodelen, sidedata_offset, sidedata_comp_len, + data_comp_mode); } /* * Pack header information in binary @@ -443,6 +448,7 @@ { uint64_t offset_flags, sidedata_offset; int rev, comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; + char data_comp_mode; Py_ssize_t c_node_id_len, sidedata_comp_len; const char *c_node_id; char *data; @@ -450,8 +456,9 @@ if (!PyArg_ParseTuple(obj, tuple_format, &offset_flags, &comp_len, &uncomp_len, &base_rev, &link_rev, &parent_1, &parent_2, &c_node_id, &c_node_id_len, - &sidedata_offset, &sidedata_comp_len)) { - PyErr_SetString(PyExc_TypeError, "10-tuple required"); + &sidedata_offset, &sidedata_comp_len, + &data_comp_mode)) { + PyErr_SetString(PyExc_TypeError, "11-tuple required"); return NULL; } @@ -459,6 +466,12 @@ PyErr_SetString(PyExc_TypeError, "invalid node"); return NULL; } + if (data_comp_mode != comp_mode_inline) { + PyErr_Format(PyExc_ValueError, + "invalid data compression mode: %i", + data_comp_mode); + return NULL; + } if (self->new_length == self->added_length) { size_t new_added_length = @@ -2761,9 +2774,9 @@ self->entry_size = v1_entry_size; } - self->nullentry = - Py_BuildValue(PY23("iiiiiiis#ii", "iiiiiiiy#ii"), 0, 0, 0, -1, -1, - -1, -1, nullid, self->nodelen, 0, 0); + self->nullentry = Py_BuildValue(PY23("iiiiiiis#iiB", "iiiiiiiy#iiB"), 0, + 0, 0, -1, -1, -1, -1, nullid, + self->nodelen, 0, 0, comp_mode_inline); if (!self->nullentry) return -1; diff -r 013c645dd28c -r 130c9f7ed914 mercurial/policy.py --- a/mercurial/policy.py Tue May 04 01:15:03 2021 +0200 +++ b/mercurial/policy.py Mon May 03 18:19:16 2021 +0200 @@ -80,7 +80,7 @@ ('cext', 'bdiff'): 3, ('cext', 'mpatch'): 1, ('cext', 'osutil'): 4, - ('cext', 'parsers'): 18, + ('cext', 'parsers'): 19, } # map import request to other package or module diff -r 013c645dd28c -r 130c9f7ed914 mercurial/pure/parsers.py --- a/mercurial/pure/parsers.py Tue May 04 01:15:03 2021 +0200 +++ b/mercurial/pure/parsers.py Mon May 03 18:19:16 2021 +0200 @@ -54,7 +54,19 @@ # Size of a C long int, platform independent int_size = struct.calcsize(b'>i') # An empty index entry, used as a default value to be overridden, or nullrev - null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0) + null_item = ( + 0, + 0, + 0, + -1, + -1, + -1, + -1, + sha1nodeconstants.nullid, + 0, + 0, + revlog_constants.COMP_MODE_INLINE, + ) @util.propertycache def entry_size(self): @@ -135,7 +147,7 @@ def _unpack_entry(self, data): r = self.index_format.unpack(data) - r = r + (0, 0) + r = r + (0, 0, revlog_constants.COMP_MODE_INLINE) return r def pack_header(self, header): @@ -303,16 +315,17 @@ self._extra[rev - self._lgt] = new def _unpack_entry(self, data): - return self.index_format.unpack(data) + return self.index_format.unpack(data) + ( + revlog_constants.COMP_MODE_INLINE, + ) def _pack_entry(self, entry): - return self.index_format.pack(*entry) + return self.index_format.pack(*entry[:10]) def entry_binary(self, rev): """return the raw binary string representing a revision""" entry = self[rev] - p = revlog_constants.INDEX_ENTRY_V2.pack(*entry) - return p + return self._pack_entry(entry) def pack_header(self, header): """pack header information as binary""" diff -r 013c645dd28c -r 130c9f7ed914 mercurial/revlog.py --- a/mercurial/revlog.py Tue May 04 01:15:03 2021 +0200 +++ b/mercurial/revlog.py Mon May 03 18:19:16 2021 +0200 @@ -35,6 +35,7 @@ from .pycompat import getattr from .revlogutils.constants import ( ALL_KINDS, + COMP_MODE_INLINE, FEATURES_BY_VERSION, FLAG_GENERALDELTA, FLAG_INLINE_DATA, @@ -336,6 +337,12 @@ [9] sidedata chunk length: The size, in bytes, of the revision's side-data chunk. + + [10] data compression mode: + two bits that detail the way the data chunk is compressed on disk. + (see "COMP_MODE_*" constants for details). For revlog version 0 and + 1 this will always be COMP_MODE_INLINE. + """ _flagserrorclass = error.RevlogError @@ -2474,6 +2481,7 @@ node, sidedata_offset, len(serialized_sidedata), + COMP_MODE_INLINE, ) self.index.append(e) diff -r 013c645dd28c -r 130c9f7ed914 mercurial/revlogutils/constants.py --- a/mercurial/revlogutils/constants.py Tue May 04 01:15:03 2021 +0200 +++ b/mercurial/revlogutils/constants.py Mon May 03 18:19:16 2021 +0200 @@ -1,4 +1,4 @@ -# revlogdeltas.py - constant used for revlog logic +# revlogdeltas.py - constant used for revlog logic. # # Copyright 2005-2007 Olivia Mackall # Copyright 2018 Octobus @@ -114,6 +114,14 @@ # bitmark for flags that could cause rawdata content change REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED +## chunk compression mode constants: +# These constants are used in revlog version >=2 to denote the compression used +# for a chunk. + +# Chunk use a compression mode stored "inline" at the start of the chunk +# itself. This is the mode always used for revlog version "0" and "1" +COMP_MODE_INLINE = 2 + SUPPORTED_FLAGS = { REVLOGV0: REVLOGV0_FLAGS, REVLOGV1: REVLOGV1_FLAGS, @@ -152,4 +160,5 @@ }, } + SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000 diff -r 013c645dd28c -r 130c9f7ed914 mercurial/revlogutils/revlogv0.py --- a/mercurial/revlogutils/revlogv0.py Tue May 04 01:15:03 2021 +0200 +++ b/mercurial/revlogutils/revlogv0.py Mon May 03 18:19:16 2021 +0200 @@ -9,6 +9,7 @@ from ..node import sha1nodeconstants from .constants import ( + COMP_MODE_INLINE, INDEX_ENTRY_V0, ) from ..i18n import _ @@ -42,7 +43,19 @@ class revlogoldindex(list): entry_size = INDEX_ENTRY_V0.size - null_item = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0) + null_item = ( + 0, + 0, + 0, + -1, + -1, + -1, + -1, + sha1nodeconstants.nullid, + 0, + 0, + COMP_MODE_INLINE, + ) @property def nodemap(self): @@ -138,6 +151,7 @@ e[6], 0, # no side data support 0, # no side data support + COMP_MODE_INLINE, ) index.append(e2) nodemap[e[6]] = n diff -r 013c645dd28c -r 130c9f7ed914 mercurial/unionrepo.py --- a/mercurial/unionrepo.py Tue May 04 01:15:03 2021 +0200 +++ b/mercurial/unionrepo.py Mon May 03 18:19:16 2021 +0200 @@ -31,6 +31,10 @@ vfs as vfsmod, ) +from .revlogutils import ( + constants as revlog_constants, +) + class unionrevlog(revlog.revlog): def __init__(self, opener, radix, revlog2, linkmapper): @@ -65,6 +69,7 @@ node, _sdo, _sds, + _dcm, ) = rev flags = _start & 0xFFFF @@ -99,6 +104,7 @@ node, 0, # sidedata offset 0, # sidedata size + revlog_constants.COMP_MODE_INLINE, ) self.index.append(e) self.bundlerevs.add(n) diff -r 013c645dd28c -r 130c9f7ed914 tests/test-parseindex2.py --- a/tests/test-parseindex2.py Tue May 04 01:15:03 2021 +0200 +++ b/tests/test-parseindex2.py Mon May 03 18:19:16 2021 +0200 @@ -21,6 +21,9 @@ policy, pycompat, ) +from mercurial.revlogutils import ( + constants, +) parsers = policy.importmod('parsers') @@ -49,7 +52,7 @@ cache = (0, data) while off <= l: e = struct.unpack(indexformatng, data[off : off + s]) - e = e + (0, 0) + e = e + (0, 0, constants.COMP_MODE_INLINE) nodemap[e[7]] = n append(e) n += 1 @@ -59,7 +62,7 @@ else: while off <= l: e = struct.unpack(indexformatng, data[off : off + s]) - e = e + (0, 0) + e = e + (0, 0, constants.COMP_MODE_INLINE) nodemap[e[7]] = n append(e) n += 1 @@ -242,7 +245,19 @@ break def testminusone(self): - want = (0, 0, 0, -1, -1, -1, -1, sha1nodeconstants.nullid, 0, 0) + want = ( + 0, + 0, + 0, + -1, + -1, + -1, + -1, + sha1nodeconstants.nullid, + 0, + 0, + constants.COMP_MODE_INLINE, + ) index, junk = parsers.parse_index2(data_inlined, True) got = index[-1] self.assertEqual(want, got) # inline data @@ -264,7 +279,20 @@ # node won't matter for this test, let's just make sure # they don't collide. Other data don't matter either. node = hexrev(p1) + hexrev(p2) + b'.' * 12 - index.append((0, 0, 12, 1, 34, p1, p2, node, 0, 0)) + e = ( + 0, + 0, + 12, + 1, + 34, + p1, + p2, + node, + 0, + 0, + constants.COMP_MODE_INLINE, + ) + index.append(e) appendrev(4) appendrev(5)