changelogv2: use a dedicated on disk format for changelogv2
authorPierre-Yves David <pierre-yves.david@octobus.net>
Tue, 04 May 2021 11:20:10 +0200
changeset 47270 25ce16bf724b
parent 47269 6be2a7ca4b1d
child 47271 055f7b9f2307
changelogv2: use a dedicated on disk format for changelogv2 We drop two unused entry. This is mostly a proof of concept before starting to actually rework the format. Differential Revision: https://phab.mercurial-scm.org/D10667
mercurial/configitems.py
mercurial/pure/parsers.py
mercurial/revlog.py
mercurial/revlogutils/constants.py
--- a/mercurial/configitems.py	Tue May 04 14:18:06 2021 +0200
+++ b/mercurial/configitems.py	Tue May 04 11:20:10 2021 +0200
@@ -1344,7 +1344,6 @@
 # Experimental TODOs:
 #
 # * Same as for evlogv2 (but for the reduction of the number of files)
-# * drop the storage of the base
 # * Improvement to investigate
 #   - storing .hgtags fnode
 #   - storing `rank` of changesets
--- a/mercurial/pure/parsers.py	Tue May 04 14:18:06 2021 +0200
+++ b/mercurial/pure/parsers.py	Tue May 04 11:20:10 2021 +0200
@@ -295,6 +295,10 @@
     return cls(data, inline), (0, data)
 
 
+def parse_index_cl_v2(data):
+    return IndexChangelogV2(data), None
+
+
 class IndexObject2(IndexObject):
     index_format = revlog_constants.INDEX_ENTRY_V2
 
@@ -355,6 +359,26 @@
         raise error.ProgrammingError(msg)
 
 
+class IndexChangelogV2(IndexObject2):
+    index_format = revlog_constants.INDEX_ENTRY_CL_V2
+
+    def _unpack_entry(self, rev, data, r=True):
+        items = self.index_format.unpack(data)
+        entry = items[:3] + (rev, rev) + items[3:8]
+        data_comp = items[8] & 3
+        sidedata_comp = (items[8] >> 2) & 3
+        return entry + (data_comp, sidedata_comp)
+
+    def _pack_entry(self, rev, entry):
+        assert entry[3] == rev, entry[3]
+        assert entry[4] == rev, entry[4]
+        data = entry[:3] + entry[5:10]
+        data_comp = entry[10] & 3
+        sidedata_comp = (entry[11] & 3) << 2
+        data += (data_comp | sidedata_comp,)
+        return self.index_format.pack(*data)
+
+
 def parse_index_devel_nodemap(data, inline):
     """like parse_index2, but alway return a PersistentNodeMapIndexObject"""
     return PersistentNodeMapIndexObject(data), None
--- a/mercurial/revlog.py	Tue May 04 14:18:06 2021 +0200
+++ b/mercurial/revlog.py	Tue May 04 11:20:10 2021 +0200
@@ -227,6 +227,15 @@
     return index, cache
 
 
+def parse_index_cl_v2(data, inline):
+    # call the C implementation to parse the index data
+    assert not inline
+    from .pure.parsers import parse_index_cl_v2
+
+    index, cache = parse_index_cl_v2(data)
+    return index, cache
+
+
 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
 
     def parse_index_v1_nodemap(data, inline):
@@ -652,7 +661,7 @@
         elif self._format_version == REVLOGV2:
             self._parse_index = parse_index_v2
         elif self._format_version == CHANGELOGV2:
-            self._parse_index = parse_index_v2
+            self._parse_index = parse_index_cl_v2
         elif devel_nodemap:
             self._parse_index = parse_index_v1_nodemap
         elif use_rust_index:
--- a/mercurial/revlogutils/constants.py	Tue May 04 14:18:06 2021 +0200
+++ b/mercurial/revlogutils/constants.py	Tue May 04 11:20:10 2021 +0200
@@ -93,6 +93,20 @@
 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
 
+#  6 bytes: offset
+#  2 bytes: flags
+#  4 bytes: compressed length
+#  4 bytes: uncompressed length
+#  4 bytes: parent 1 rev
+#  4 bytes: parent 2 rev
+# 32 bytes: nodeid
+#  8 bytes: sidedata offset
+#  4 bytes: sidedata compressed length
+#  1 bytes: compression mode (2 lower bit are data_compression_mode)
+#  27 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
+INDEX_ENTRY_CL_V2 = struct.Struct(b">Qiiii20s12xQiB27x")
+assert INDEX_ENTRY_CL_V2.size == 32 * 3, INDEX_ENTRY_V2.size
+
 # revlog index flags
 
 # For historical reasons, revlog's internal flags were exposed via the