nodemap: track the tip_node for validation
authorPierre-Yves David <pierre-yves.david@octobus.net>
Thu, 27 Feb 2020 16:32:43 +0100
changeset 44515 6c906eaedd0d
parent 44514 15a033cabc19
child 44516 64e2f603de9d
nodemap: track the tip_node for validation Differential Revision: https://phab.mercurial-scm.org/D8184
mercurial/configitems.py
mercurial/debugcommands.py
mercurial/revlog.py
mercurial/revlogutils/nodemap.py
tests/test-persistent-nodemap.t
--- a/mercurial/configitems.py	Thu Feb 27 15:42:04 2020 +0100
+++ b/mercurial/configitems.py	Thu Feb 27 16:32:43 2020 +0100
@@ -409,8 +409,6 @@
 #
 # * code/tests around aborted transaction
 # * code/tests around pending data for hooks
-# * code/tests around detection of invalid cache
-#   (eg: after strip from an incompatible client)
 # * regenerate a new nodemap when the unused/total ration is to high
 # * decide for a "status" of the persistent nodemap and associated location
 #   - part of the store next the revlog itself (new requirements)
--- a/mercurial/debugcommands.py	Thu Feb 27 15:42:04 2020 +0100
+++ b/mercurial/debugcommands.py	Thu Feb 27 16:32:43 2020 +0100
@@ -2136,6 +2136,7 @@
             docket, data = nm_data
             ui.write((b"uid: %s\n") % docket.uid)
             ui.write((b"tip-rev: %d\n") % docket.tip_rev)
+            ui.write((b"tip-node: %s\n") % hex(docket.tip_node))
             ui.write((b"data-length: %d\n") % docket.data_length)
             ui.write((b"data-unused: %d\n") % docket.data_unused)
 
--- a/mercurial/revlog.py	Thu Feb 27 15:42:04 2020 +0100
+++ b/mercurial/revlog.py	Thu Feb 27 16:32:43 2020 +0100
@@ -643,8 +643,11 @@
             if use_nodemap:
                 nodemap_data = nodemaputil.persisted_data(self)
                 if nodemap_data is not None:
-                    self._nodemap_docket = nodemap_data[0]
-                    index.update_nodemap_data(*nodemap_data)
+                    docket = nodemap_data[0]
+                    if d[0][docket.tip_rev][7] == docket.tip_node:
+                        # no changelog tampering
+                        self._nodemap_docket = docket
+                        index.update_nodemap_data(*nodemap_data)
         except (ValueError, IndexError):
             raise error.RevlogError(
                 _(b"index %s is corrupted") % self.indexfile
--- a/mercurial/revlogutils/nodemap.py	Thu Feb 27 15:42:04 2020 +0100
+++ b/mercurial/revlogutils/nodemap.py	Thu Feb 27 16:32:43 2020 +0100
@@ -38,10 +38,12 @@
         return None
     offset += S_VERSION.size
     headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
-    uid_size, tip_rev, data_length, data_unused = headers
+    uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
     offset += S_HEADER.size
     docket = NodeMapDocket(pdata[offset : offset + uid_size])
+    offset += uid_size
     docket.tip_rev = tip_rev
+    docket.tip_node = pdata[offset : offset + tip_node_size]
     docket.data_length = data_length
     docket.data_unused = data_unused
 
@@ -164,6 +166,7 @@
                     new_data = util.buffer(util.mmapread(fd, len(data)))
         target_docket.data_length = len(data)
     target_docket.tip_rev = revlog.tiprev()
+    target_docket.tip_node = revlog.node(target_docket.tip_rev)
     # EXP-TODO: if this is a cache, this should use a cache vfs, not a
     # store vfs
     with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
@@ -214,7 +217,7 @@
 # version 0 is experimental, no BC garantee, do no use outside of tests.
 ONDISK_VERSION = 0
 S_VERSION = struct.Struct(">B")
-S_HEADER = struct.Struct(">BQQQ")
+S_HEADER = struct.Struct(">BQQQQ")
 
 ID_SIZE = 8
 
@@ -242,6 +245,15 @@
         # the tipmost revision stored in the data file. This revision and all
         # revision before it are expected to be encoded in the data file.
         self.tip_rev = None
+        # the node of that tipmost revision, if it mismatch the current index
+        # data the docket is not valid for the current index and should be
+        # discarded.
+        #
+        # note: this method is not perfect as some destructive operation could
+        # preserve the same tip_rev + tip_node while altering lower revision.
+        # However this multiple other caches have the same vulnerability (eg:
+        # brancmap cache).
+        self.tip_node = None
         # the size (in bytes) of the persisted data to encode the nodemap valid
         # for `tip_rev`.
         #   - data file shorter than this are corrupted,
@@ -254,6 +266,7 @@
     def copy(self):
         new = NodeMapDocket(uid=self.uid)
         new.tip_rev = self.tip_rev
+        new.tip_node = self.tip_node
         new.data_length = self.data_length
         new.data_unused = self.data_unused
         return new
@@ -281,9 +294,11 @@
             self.tip_rev,
             self.data_length,
             self.data_unused,
+            len(self.tip_node),
         )
         data.append(S_HEADER.pack(*headers))
         data.append(self.uid)
+        data.append(self.tip_node)
         return b''.join(data)
 
 
--- a/tests/test-persistent-nodemap.t	Thu Feb 27 15:42:04 2020 +0100
+++ b/tests/test-persistent-nodemap.t	Thu Feb 27 16:32:43 2020 +0100
@@ -14,10 +14,11 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5000
+  tip-node: 06ddac466af534d365326c13c3879f97caca3cb1
   data-length: 122880
   data-unused: 0
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=42
+  .hg/store/00changelog.n: size=70
 
 Simple lookup works
 
@@ -95,18 +96,20 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5001
+  tip-node: 2dd9b5258caa46469ff07d4a3da1eb3529a51f49
   data-length: 122880
   data-unused: 0
 #else
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5001
+  tip-node: 2dd9b5258caa46469ff07d4a3da1eb3529a51f49
   data-length: 123072
   data-unused: 192
 #endif
 
   $ f --size .hg/store/00changelog.n
-  .hg/store/00changelog.n: size=42
+  .hg/store/00changelog.n: size=70
 
 (The pure code use the debug code that perform incremental update, the C code reencode from scratch)
 
@@ -148,6 +151,7 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5002
+  tip-node: 6ce944fafcee85af91f29ea5b51654cc6101ad7e
   data-length: 123328
   data-unused: 384
   $ f --sha256 .hg/store/00changelog-*.nd --size
@@ -157,6 +161,7 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5002
+  tip-node: 6ce944fafcee85af91f29ea5b51654cc6101ad7e
   data-length: 123328
   data-unused: 384
   $ f --sha256 .hg/store/00changelog-*.nd --size
@@ -166,6 +171,7 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5002
+  tip-node: 6ce944fafcee85af91f29ea5b51654cc6101ad7e
   data-length: 122944
   data-unused: 0
   $ f --sha256 .hg/store/00changelog-*.nd --size
@@ -181,12 +187,14 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5002
+  tip-node: 6ce944fafcee85af91f29ea5b51654cc6101ad7e
   data-length: 122944
   data-unused: 0
 #else
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5002
+  tip-node: 6ce944fafcee85af91f29ea5b51654cc6101ad7e
   data-length: 122944
   data-unused: 0
 #endif
@@ -215,6 +223,7 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5003
+  tip-node: 5c049e9c4a4af159bdcd65dce1b6bf303a0da6cf
   data-length: 123200 (pure !)
   data-length: 123200 (rust !)
   data-length: 122944 (no-rust no-pure !)
@@ -225,7 +234,50 @@
   $ hg debugnodemap --metadata
   uid: ???????????????? (glob)
   tip-rev: 5002
+  tip-node: 6ce944fafcee85af91f29ea5b51654cc6101ad7e
   data-length: 122944
   data-unused: 0
   $ hg log -r "$NODE" -T '{rev}\n'
   5003
+
+changelog altered
+-----------------
+
+If the nodemap is not gated behind a requirements, an unaware client can alter
+the repository so the revlog used to generate the nodemap is not longer
+compatible with the persistent nodemap. We need to detect that.
+
+  $ hg up "$NODE~5"
+  0 files updated, 0 files merged, 2 files removed, 0 files unresolved
+  $ echo bar > babar
+  $ hg add babar
+  $ hg ci -m 'babar'
+  created new head
+  $ OTHERNODE=`hg log -r tip -T '{node}\n'`
+  $ hg log -r "$OTHERNODE" -T '{rev}\n'
+  5004
+
+  $ hg --config extensions.strip= strip --rev "$NODE~1" --no-backup
+
+the nodemap should detect the changelog have been tampered with and recover.
+
+  $ hg debugnodemap --metadata
+  uid: ???????????????? (glob)
+  tip-rev: 5002
+  tip-node: 42bf3068c7ddfdfded53c4eb11d02266faeebfee
+  data-length: 123456 (pure !)
+  data-length: 246464 (rust !)
+  data-length: 123008 (no-pure no-rust !)
+  data-unused: 448 (pure !)
+  data-unused: 123904 (rust !)
+  data-unused: 0 (no-pure no-rust !)
+
+  $ cp -f ../tmp-copies/* .hg/store/
+  $ hg debugnodemap --metadata
+  uid: ???????????????? (glob)
+  tip-rev: 5002
+  tip-node: 6ce944fafcee85af91f29ea5b51654cc6101ad7e
+  data-length: 122944
+  data-unused: 0
+  $ hg log -r "$OTHERNODE" -T '{rev}\n'
+  5002