revlogv2: don't assume that the sidedata of the last rev is right after data
authorRaphaël Gomès <rgomes@octobus.net>
Fri, 19 Feb 2021 11:07:10 +0100
changeset 46710 4cd214c9948d
parent 46709 3d740058b467
child 46711 a41565bef69f
revlogv2: don't assume that the sidedata of the last rev is right after data We are going to be rewriting sidedata soon, it's going to be appended to the revlog data file, meaning that the data and the sidedata might not be contiguous. Differential Revision: https://phab.mercurial-scm.org/D10025
mercurial/revlog.py
--- a/mercurial/revlog.py	Mon Jan 18 11:44:51 2021 +0100
+++ b/mercurial/revlog.py	Fri Feb 19 11:07:10 2021 +0100
@@ -920,7 +920,7 @@
     # Derived from index values.
 
     def end(self, rev):
-        return self.start(rev) + self.length(rev) + self.sidedata_length(rev)
+        return self.start(rev) + self.length(rev)
 
     def parents(self, node):
         i = self.index
@@ -2331,7 +2331,8 @@
 
         curr = len(self)
         prev = curr - 1
-        offset = self.end(prev)
+
+        offset = self._get_data_offset(prev)
 
         if self._concurrencychecker:
             if self._inline:
@@ -2417,6 +2418,26 @@
         self._chainbasecache[curr] = deltainfo.chainbase
         return curr
 
+    def _get_data_offset(self, prev):
+        """Returns the current offset in the (in-transaction) data file.
+        Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
+        file to store that information: since sidedata can be rewritten to the
+        end of the data file within a transaction, you can have cases where, for
+        example, rev `n` does not have sidedata while rev `n - 1` does, leading
+        to `n - 1`'s sidedata being written after `n`'s data.
+
+        TODO cache this in a docket file before getting out of experimental."""
+        if self.version & 0xFFFF != REVLOGV2:
+            return self.end(prev)
+
+        offset = 0
+        for rev, entry in enumerate(self.index):
+            sidedata_end = entry[8] + entry[9]
+            # Sidedata for a previous rev has potentially been written after
+            # this rev's end, so take the max.
+            offset = max(self.end(rev), offset, sidedata_end)
+        return offset
+
     def _writeentry(
         self, transaction, ifh, dfh, entry, data, link, offset, sidedata
     ):