dirstate-v2: add devel config option to control write behavior stable
authorRaphaël Gomès <rgomes@octobus.net>
Tue, 28 Feb 2023 15:49:53 +0100
branchstable
changeset 50222 ecd28d89c29e
parent 50221 1891086f6c7f
child 50223 3f34d800cc69
dirstate-v2: add devel config option to control write behavior This will help us to write predictable tests checking behavior in each case.
mercurial/configitems.py
mercurial/dirstatemap.py
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/on_disk.rs
rust/hg-core/src/repo.rs
rust/hg-cpython/src/dirstate/dirstate_map.rs
tests/test-dirstate-status-write-race.t
tests/test-dirstate.t
--- a/mercurial/configitems.py	Fri Feb 24 18:21:54 2023 +0100
+++ b/mercurial/configitems.py	Tue Feb 28 15:49:53 2023 +0100
@@ -640,6 +640,15 @@
     b'deprec-warn',
     default=False,
 )
+# possible values:
+# - auto (the default)
+# - force-append
+# - force-new
+coreconfigitem(
+    b'devel',
+    b'dirstate.v2.data_update_mode',
+    default="auto",
+)
 coreconfigitem(
     b'devel',
     b'disableloaddefaultcerts',
--- a/mercurial/dirstatemap.py	Fri Feb 24 18:21:54 2023 +0100
+++ b/mercurial/dirstatemap.py	Tue Feb 28 15:49:53 2023 +0100
@@ -33,6 +33,7 @@
 
 WRITE_MODE_AUTO = 0
 WRITE_MODE_FORCE_NEW = 1
+WRITE_MODE_FORCE_APPEND = 2
 
 
 class _dirstatemapcommon:
@@ -57,6 +58,16 @@
         self._parents = None
         self._dirtyparents = False
         self._docket = None
+        write_mode = ui.config(b"devel", b"dirstate.v2.data_update_mode")
+        if write_mode == b"auto":
+            self._write_mode = WRITE_MODE_AUTO
+        elif write_mode == b"force-append":
+            self._write_mode = WRITE_MODE_FORCE_APPEND
+        elif write_mode == b"force-new":
+            self._write_mode = WRITE_MODE_FORCE_NEW
+        else:
+            # unknown value, fallback to default
+            self._write_mode = WRITE_MODE_AUTO
 
         # for consistent view between _pl() and _read() invocations
         self._pendingmode = None
@@ -612,7 +623,7 @@
                 return
 
             # We can only append to an existing data file if there is one
-            write_mode = WRITE_MODE_AUTO
+            write_mode = self._write_mode
             if self.docket.uuid is None:
                 write_mode = WRITE_MODE_FORCE_NEW
             packed, meta, append = self._map.write_v2(write_mode)
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Fri Feb 24 18:21:54 2023 +0100
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Tue Feb 28 15:49:53 2023 +0100
@@ -42,6 +42,7 @@
 pub enum DirstateMapWriteMode {
     Auto,
     ForceNewDataFile,
+    ForceAppend,
 }
 
 #[derive(Debug)]
@@ -69,6 +70,9 @@
     pub(super) old_data_size: usize,
 
     pub(super) dirstate_version: DirstateVersion,
+
+    /// Controlled by config option `devel.dirstate.v2.data_update_mode`
+    pub(super) write_mode: DirstateMapWriteMode,
 }
 
 /// Using a plain `HgPathBuf` of the full path from the repository root as a
@@ -457,6 +461,7 @@
             unreachable_bytes: 0,
             old_data_size: 0,
             dirstate_version: DirstateVersion::V1,
+            write_mode: DirstateMapWriteMode::Auto,
         }
     }
 
@@ -525,8 +530,15 @@
     /// append to the existing data file that contains `self.on_disk` (true),
     /// or create a new data file from scratch (false).
     pub(super) fn write_should_append(&self) -> bool {
-        let ratio = self.unreachable_bytes as f32 / self.on_disk.len() as f32;
-        ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
+        match self.write_mode {
+            DirstateMapWriteMode::ForceAppend => true,
+            DirstateMapWriteMode::ForceNewDataFile => false,
+            DirstateMapWriteMode::Auto => {
+                let ratio =
+                    self.unreachable_bytes as f32 / self.on_disk.len() as f32;
+                ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO
+            }
+        }
     }
 
     fn get_node<'tree>(
@@ -923,6 +935,10 @@
             *unreachable_bytes += path.len() as u32
         }
     }
+
+    pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) {
+        self.write_mode = write_mode;
+    }
 }
 
 /// Like `Iterator::filter_map`, but over a fallible iterator of `Result`s.
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Fri Feb 24 18:21:54 2023 +0100
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Tue Feb 28 15:49:53 2023 +0100
@@ -313,6 +313,7 @@
         unreachable_bytes: meta.unreachable_bytes.get(),
         old_data_size: on_disk.len(),
         dirstate_version: DirstateVersion::V2,
+        write_mode: DirstateMapWriteMode::Auto,
     };
     Ok(dirstate_map)
 }
@@ -641,6 +642,7 @@
     let append = match write_mode {
         DirstateMapWriteMode::Auto => dirstate_map.write_should_append(),
         DirstateMapWriteMode::ForceNewDataFile => false,
+        DirstateMapWriteMode::ForceAppend => true,
     };
     if append {
         log::trace!("appending to the dirstate data file");
--- a/rust/hg-core/src/repo.rs	Fri Feb 24 18:21:54 2023 +0100
+++ b/rust/hg-core/src/repo.rs	Tue Feb 28 15:49:53 2023 +0100
@@ -321,22 +321,39 @@
                 .set(Some(docket.uuid.to_owned()));
             let data_size = docket.data_size();
             let metadata = docket.tree_metadata();
-            if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
-                // Don't mmap on NFS to prevent `SIGBUS` error on deletion
-                OwningDirstateMap::new_v2(
-                    self.hg_vfs().read(docket.data_filename())?,
-                    data_size,
-                    metadata,
-                )
-            } else if let Some(data_mmap) = self
-                .hg_vfs()
-                .mmap_open(docket.data_filename())
-                .io_not_found_as_none()?
-            {
-                OwningDirstateMap::new_v2(data_mmap, data_size, metadata)
-            } else {
-                OwningDirstateMap::new_v2(Vec::new(), data_size, metadata)
-            }
+            let mut map =
+                if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
+                    // Don't mmap on NFS to prevent `SIGBUS` error on deletion
+                    OwningDirstateMap::new_v2(
+                        self.hg_vfs().read(docket.data_filename())?,
+                        data_size,
+                        metadata,
+                    )
+                } else if let Some(data_mmap) = self
+                    .hg_vfs()
+                    .mmap_open(docket.data_filename())
+                    .io_not_found_as_none()?
+                {
+                    OwningDirstateMap::new_v2(data_mmap, data_size, metadata)
+                } else {
+                    OwningDirstateMap::new_v2(Vec::new(), data_size, metadata)
+                }?;
+
+            let write_mode_config = self
+                .config()
+                .get_str(b"devel", b"dirstate.v2.data_update_mode")
+                .unwrap_or(Some("auto"))
+                .unwrap_or("auto"); // don't bother for devel options
+            let write_mode = match write_mode_config {
+                "auto" => DirstateMapWriteMode::Auto,
+                "force-new" => DirstateMapWriteMode::ForceNewDataFile,
+                "force-append" => DirstateMapWriteMode::ForceAppend,
+                _ => DirstateMapWriteMode::Auto,
+            };
+
+            map.with_dmap_mut(|m| m.set_write_mode(write_mode));
+
+            Ok(map)
         } else {
             let (map, parents) =
                 OwningDirstateMap::new_v1(dirstate_file_contents)?;
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs	Fri Feb 24 18:21:54 2023 +0100
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs	Tue Feb 28 15:49:53 2023 +0100
@@ -254,6 +254,7 @@
         let rust_write_mode = match write_mode {
             0 => DirstateMapWriteMode::Auto,
             1 => DirstateMapWriteMode::ForceNewDataFile,
+            2 => DirstateMapWriteMode::ForceAppend,
             _ => DirstateMapWriteMode::Auto, // XXX should we error out?
         };
         let result = inner.pack_v2(rust_write_mode);
--- a/tests/test-dirstate-status-write-race.t	Fri Feb 24 18:21:54 2023 +0100
+++ b/tests/test-dirstate-status-write-race.t	Tue Feb 28 15:49:53 2023 +0100
@@ -93,7 +93,7 @@
   $ hg add dir/o
   $ hg remove dir/nested/m
 
-  $ hg st
+  $ hg st --config devel.dirstate.v2.data_update_mode=force-new
   A dir/o
   R dir/nested/m
   ? dir/n
--- a/tests/test-dirstate.t	Fri Feb 24 18:21:54 2023 +0100
+++ b/tests/test-dirstate.t	Tue Feb 28 15:49:53 2023 +0100
@@ -211,8 +211,223 @@
 
 #endif
 
+(non-Rust always rewrites)
+
+Test the devel option to control write behavior
+==============================================
+
+Sometimes, debugging or testing the dirstate requires making sure that we have
+done a complete rewrite of the data file and have no unreachable data around,
+sometimes it requires we ensure we don't.
+
+We test the option to force this rewrite by creating the situation where an
+append would happen and check that it doesn't happen.
+
+  $ cd ..
+  $ hg init force-base
+  $ cd force-base
+  $ mkdir -p dir/nested dir2
+  $ touch -t 200001010000 f dir/nested/a dir/b dir/c dir/d dir2/e dir/nested dir dir2
+  $ hg commit -Aqm "recreate a bunch of files to facilitate append"
+  $ hg st --config devel.dirstate.v2.data_update_mode=force-new
+  $ cd ..
+
+#if dirstate-v2
+  $ hg -R force-base debugstate --docket | grep unused
+  number of unused bytes: 0
+
+Check with the option in "auto" mode
+------------------------------------
+  $ cp -a force-base append-mostly-no-force-rewrite
+  $ cd append-mostly-no-force-rewrite
+  $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of dir on disk which will be recorded, causing a small enough change
+to warrant only an append
+
+  $ touch -t 202212010000 dir2
+  $ hg st \
+  > --config rhg.on-unsupported=abort \
+  > --config devel.dirstate.v2.data_update_mode=auto
+
+UUID hasn't changed and a non-zero number of unused bytes means we've appended
+
+  $ dirstate_uuid_has_not_changed
+  not testing because using Python implementation (no-rust no-rhg !)
+
+#if no-rust no-rhg
+The pure python implementation never appends at the time this is written.
+  $ hg debugstate --docket | grep unused
+  number of unused bytes: 0 (known-bad-output !)
+#else
+  $ hg debugstate --docket | grep unused
+  number of unused bytes: [1-9]\d* (re)
+#endif
+  $ cd ..
+
+Check the same scenario with the option set to "force-new"
+---------------------------------------------------------
+
+  $ cp -a force-base append-mostly-force-rewrite
+  $ cd append-mostly-force-rewrite
+  $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of dir on disk which will be recorded, causing a small enough change
+to warrant only an append, but we force the rewrite
+
+  $ touch -t 202212010000 dir2
+  $ hg st \
+  > --config rhg.on-unsupported=abort \
+  > --config devel.dirstate.v2.data_update_mode=force-new
+
+UUID has changed and zero unused bytes means a full-rewrite happened
+
+
+#if no-rust no-rhg
+  $ dirstate_uuid_has_not_changed
+  not testing because using Python implementation
+#else
+  $ dirstate_uuid_has_not_changed
+  [1]
+#endif
+  $ hg debugstate --docket | grep unused
+  number of unused bytes: 0
+  $ cd ..
+
+
+Check the same scenario with the option set to "force-append"
+-------------------------------------------------------------
+
+(should behave the same as "auto" here)
+
+  $ cp -a force-base append-mostly-force-append
+  $ cd append-mostly-force-append
+  $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of dir on disk which will be recorded, causing a small enough change
+to warrant only an append, which we are forcing here anyway.
+
+  $ touch -t 202212010000 dir2
+  $ hg st \
+  > --config rhg.on-unsupported=abort \
+  > --config devel.dirstate.v2.data_update_mode=force-append
+
+UUID has not changed and some unused bytes exist in the data file
+
+  $ dirstate_uuid_has_not_changed
+  not testing because using Python implementation (no-rust no-rhg !)
+
+#if no-rust no-rhg
+The pure python implementation never appends at the time this is written.
+  $ hg debugstate --docket | grep unused
+  number of unused bytes: 0 (known-bad-output !)
+#else
+  $ hg debugstate --docket | grep unused
+  number of unused bytes: [1-9]\d* (re)
+#endif
+  $ cd ..
+
+Check with the option in "auto" mode
+------------------------------------
+  $ cp -a force-base append-mostly-no-force-rewrite
+  $ cd append-mostly-no-force-rewrite
+  $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of everything on disk causing a full rewrite
+
+  $ touch -t 202212010005 `hg files`
+  $ hg st \
+  > --config rhg.on-unsupported=abort \
+  > --config devel.dirstate.v2.data_update_mode=auto
+
+UUID has changed and zero unused bytes means we've rewritten.
+
+#if no-rust no-rhg
+  $ dirstate_uuid_has_not_changed
+  not testing because using Python implementation
+#else
+  $ dirstate_uuid_has_not_changed
+  [1]
+#endif
+
+  $ hg debugstate --docket | grep unused
+  number of unused bytes: 0 (known-bad-output !)
+  $ cd ..
+
+Check the same scenario with the option set to "force-new"
+---------------------------------------------------------
+
+(should be the same as auto)
+
+  $ cp -a force-base append-mostly-force-rewrite
+  $ cd append-mostly-force-rewrite
+  $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of everything on disk causing a full rewrite
+
+  $ touch -t 202212010005 `hg files`
+  $ hg st \
+  > --config rhg.on-unsupported=abort \
+  > --config devel.dirstate.v2.data_update_mode=force-new
+
+UUID has changed and a zero number unused bytes means we've rewritten.
+
+
+#if no-rust no-rhg
+  $ dirstate_uuid_has_not_changed
+  not testing because using Python implementation
+#else
+  $ dirstate_uuid_has_not_changed
+  [1]
+#endif
+  $ hg debugstate --docket | grep unused
+  number of unused bytes: 0
+  $ cd ..
+
+
+Check the same scenario with the option set to "force-append"
+-------------------------------------------------------------
+
+Should append even if "auto" did not
+
+  $ cp -a force-base append-mostly-force-append
+  $ cd append-mostly-force-append
+  $ current_uid=$(find_dirstate_uuid)
+
+Change mtime of everything on disk causing a full rewrite
+
+  $ touch -t 202212010005 `hg files`
+  $ hg st \
+  > --config rhg.on-unsupported=abort \
+  > --config devel.dirstate.v2.data_update_mode=force-append
+
+UUID has not changed and some unused bytes exist in the data file
+
+  $ dirstate_uuid_has_not_changed
+  not testing because using Python implementation (no-rust no-rhg !)
+
+#if no-rust no-rhg
+The pure python implementation is never appending at the time this is written.
+  $ hg debugstate --docket | grep unused
+  number of unused bytes: 0 (known-bad-output !)
+#else
+  $ hg debugstate --docket | grep unused
+  number of unused bytes: [1-9]\d* (re)
+#endif
+  $ cd ..
+
+
+
+Get back into a state suitable for the test of the file.
+
+  $ cd ./append-mostly
+
+#else
+  $ cd ./u
+#endif
+
 Transaction compatibility
--------------------------
+=========================
 
 The transaction preserves the dirstate.
 We should make sure all of it (docket + data) is preserved