revlogv2: use a unique filename for data
authorPierre-Yves David <pierre-yves.david@octobus.net>
Wed, 19 May 2021 16:55:36 +0200
changeset 47324 0a3fa41fa719
parent 47323 f612db768c7a
child 47325 f286d715f9ab
revlogv2: use a unique filename for data Having a unique data will allow for ambiguity less rewriting of revlog content, something useful to clarify handling of some operation like censoring or stripping. Differential Revision: https://phab.mercurial-scm.org/D10772
mercurial/revlog.py
mercurial/revlogutils/docket.py
mercurial/store.py
tests/test-revlog-v2.t
--- a/mercurial/revlog.py	Tue May 18 15:07:17 2021 +0200
+++ b/mercurial/revlog.py	Wed May 19 16:55:36 2021 +0200
@@ -628,7 +628,9 @@
             # main docket, so disable it for now.
             self._nodemap_file = None
 
-        if self.postfix is None:
+        if self._docket is not None:
+            self._datafile = self._docket.data_filepath()
+        elif self.postfix is None:
             self._datafile = b'%s.d' % self.radix
         else:
             self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
--- a/mercurial/revlogutils/docket.py	Tue May 18 15:07:17 2021 +0200
+++ b/mercurial/revlogutils/docket.py	Wed May 19 16:55:36 2021 +0200
@@ -89,12 +89,13 @@
 #          |   This is mandatory as docket must be compatible with the previous
 #          |   revlog index header.
 # * 1 bytes: size of index uuid
+# * 1 bytes: size of data uuid
 # * 8 bytes: size of index-data
 # * 8 bytes: pending size of index-data
 # * 8 bytes: size of data
 # * 8 bytes: pending size of data
 # * 1 bytes: default compression header
-S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BLLLLc')
+S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BBLLLLc')
 
 
 class RevlogDocket(object):
@@ -106,6 +107,7 @@
         use_pending=False,
         version_header=None,
         index_uuid=None,
+        data_uuid=None,
         index_end=0,
         pending_index_end=0,
         data_end=0,
@@ -119,6 +121,7 @@
         self._path = revlog._docket_file
         self._opener = revlog.opener
         self._index_uuid = index_uuid
+        self._data_uuid = data_uuid
         # thes asserts should be True as long as we have a single index filename
         assert index_end <= pending_index_end
         assert data_end <= pending_data_end
@@ -141,6 +144,13 @@
             self._index_uuid = make_uid()
         return b"%s-%s.idx" % (self._radix, self._index_uuid)
 
+    def data_filepath(self):
+        """file path to the current index file associated to this docket"""
+        # very simplistic version at first
+        if self._data_uuid is None:
+            self._data_uuid = make_uid()
+        return b"%s-%s.dat" % (self._radix, self._data_uuid)
+
     @property
     def index_end(self):
         return self._index_end
@@ -195,6 +205,7 @@
         data = (
             self._version_header,
             len(self._index_uuid),
+            len(self._data_uuid),
             official_index_end,
             self._index_end,
             official_data_end,
@@ -204,6 +215,7 @@
         s = []
         s.append(S_HEADER.pack(*data))
         s.append(self._index_uuid)
+        s.append(self._data_uuid)
         return b''.join(s)
 
 
@@ -230,16 +242,20 @@
     index_uuid_size = header[1]
     index_uuid = data[offset : offset + index_uuid_size]
     offset += index_uuid_size
-    index_size = header[2]
-    pending_index_size = header[3]
-    data_size = header[4]
-    pending_data_size = header[5]
-    default_compression_header = header[6]
+    data_uuid_size = header[2]
+    data_uuid = data[offset : offset + data_uuid_size]
+    offset += data_uuid_size
+    index_size = header[3]
+    pending_index_size = header[4]
+    data_size = header[5]
+    pending_data_size = header[6]
+    default_compression_header = header[7]
     docket = RevlogDocket(
         revlog,
         use_pending=use_pending,
         version_header=version_header,
         index_uuid=index_uuid,
+        data_uuid=data_uuid,
         index_end=index_size,
         pending_index_end=pending_index_size,
         data_end=data_size,
--- a/mercurial/store.py	Tue May 18 15:07:17 2021 +0200
+++ b/mercurial/store.py	Wed May 19 16:55:36 2021 +0200
@@ -389,7 +389,14 @@
 ]
 
 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
-REVLOG_FILES_OTHER_EXT = (b'.idx', b'.d', b'.n', b'.nd', b'd.tmpcensored')
+REVLOG_FILES_OTHER_EXT = (
+    b'.idx',
+    b'.d',
+    b'.dat',
+    b'.n',
+    b'.nd',
+    b'd.tmpcensored',
+)
 # files that are "volatile" and might change between listing and streaming
 #
 # note: the ".nd" file are nodemap data and won't "change" but they might be
@@ -414,6 +421,7 @@
         if f.endswith(REVLOG_FILES_VOLATILE_EXT):
             t |= FILEFLAGS_VOLATILE
         return t
+    return None
 
 
 # the file is part of changelog data
@@ -753,6 +761,7 @@
             ef = self.encode(f)
             try:
                 t = revlog_type(f)
+                assert t is not None, f
                 t |= FILEFLAGS_FILELOG
                 yield t, f, ef, self.getsize(ef)
             except OSError as err:
--- a/tests/test-revlog-v2.t	Tue May 18 15:07:17 2021 +0200
+++ b/tests/test-revlog-v2.t	Wed May 19 16:55:36 2021 +0200
@@ -58,6 +58,7 @@
   date:        Thu Jan 01 00:00:00 1970 +0000
   summary:     initial
   
+
 Header written as expected
 
   $ f --hexdump --bytes 4 .hg/store/00changelog.i
@@ -77,9 +78,9 @@
 - a data file
 
   $ ls .hg/store/00changelog* .hg/store/00manifest*
-  .hg/store/00changelog-b870a51b.idx
-  .hg/store/00changelog.d
+  .hg/store/00changelog-6b8ab34b.dat
+  .hg/store/00changelog-88698448.idx
   .hg/store/00changelog.i
-  .hg/store/00manifest-88698448.idx
-  .hg/store/00manifest.d
+  .hg/store/00manifest-1335303a.dat
+  .hg/store/00manifest-b875dfc5.idx
   .hg/store/00manifest.i