rust/hg-core/src/repo.rs
changeset 50252 a6b8b1ab9116
parent 49937 750409505286
parent 50245 dbe09fb038fc
child 50658 1e2c6cda2309
--- a/rust/hg-core/src/repo.rs	Thu Mar 02 04:16:47 2023 +0100
+++ b/rust/hg-core/src/repo.rs	Thu Mar 02 19:02:52 2023 +0100
@@ -1,6 +1,7 @@
 use crate::changelog::Changelog;
 use crate::config::{Config, ConfigError, ConfigParseError};
 use crate::dirstate::DirstateParents;
+use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode;
 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
 use crate::dirstate_tree::owning::OwningDirstateMap;
 use crate::errors::HgResultExt;
@@ -9,6 +10,7 @@
 use crate::manifest::{Manifest, Manifestlog};
 use crate::revlog::filelog::Filelog;
 use crate::revlog::RevlogError;
+use crate::utils::debug::debug_wait_for_file_or_print;
 use crate::utils::files::get_path_from_bytes;
 use crate::utils::hg_path::HgPath;
 use crate::utils::SliceExt;
@@ -22,6 +24,10 @@
 use std::io::Write as IoWrite;
 use std::path::{Path, PathBuf};
 
+const V2_MAX_READ_ATTEMPTS: usize = 5;
+
+type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize);
+
 /// A repository on disk
 pub struct Repo {
     working_directory: PathBuf,
@@ -30,7 +36,6 @@
     requirements: HashSet<String>,
     config: Config,
     dirstate_parents: LazyCell<DirstateParents>,
-    dirstate_data_file_uuid: LazyCell<Option<Vec<u8>>>,
     dirstate_map: LazyCell<OwningDirstateMap>,
     changelog: LazyCell<Changelog>,
     manifestlog: LazyCell<Manifestlog>,
@@ -180,7 +185,6 @@
             dot_hg,
             config: repo_config,
             dirstate_parents: LazyCell::new(),
-            dirstate_data_file_uuid: LazyCell::new(),
             dirstate_map: LazyCell::new(),
             changelog: LazyCell::new(),
             manifestlog: LazyCell::new(),
@@ -254,6 +258,15 @@
             .unwrap_or_default())
     }
 
+    fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
+        use std::os::unix::fs::MetadataExt;
+        Ok(self
+            .hg_vfs()
+            .symlink_metadata("dirstate")
+            .io_not_found_as_none()?
+            .map(|meta| meta.ino()))
+    }
+
     pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
         Ok(*self
             .dirstate_parents
@@ -263,15 +276,10 @@
     fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
         let dirstate = self.dirstate_file_contents()?;
         let parents = if dirstate.is_empty() {
-            if self.has_dirstate_v2() {
-                self.dirstate_data_file_uuid.set(None);
-            }
             DirstateParents::NULL
         } else if self.has_dirstate_v2() {
             let docket =
                 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
-            self.dirstate_data_file_uuid
-                .set(Some(docket.uuid.to_owned()));
             docket.parents()
         } else {
             *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
@@ -280,57 +288,179 @@
         Ok(parents)
     }
 
-    fn read_dirstate_data_file_uuid(
+    /// Returns the information read from the dirstate docket necessary to
+    /// check if the data file has been updated/deleted by another process
+    /// since we last read the dirstate.
+    /// Namely, the inode, data file uuid and the data size.
+    fn get_dirstate_data_file_integrity(
         &self,
-    ) -> Result<Option<Vec<u8>>, HgError> {
+    ) -> Result<DirstateMapIdentity, HgError> {
         assert!(
             self.has_dirstate_v2(),
             "accessing dirstate data file ID without dirstate-v2"
         );
+        // Get the identity before the contents since we could have a race
+        // between the two. Having an identity that is too old is fine, but
+        // one that is younger than the content change is bad.
+        let identity = self.dirstate_identity()?;
         let dirstate = self.dirstate_file_contents()?;
         if dirstate.is_empty() {
             self.dirstate_parents.set(DirstateParents::NULL);
-            Ok(None)
+            Ok((identity, None, 0))
         } else {
             let docket =
                 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
             self.dirstate_parents.set(docket.parents());
-            Ok(Some(docket.uuid.to_owned()))
+            Ok((identity, Some(docket.uuid.to_owned()), docket.data_size()))
         }
     }
 
     fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
+        if self.has_dirstate_v2() {
+            // The v2 dirstate is split into a docket and a data file.
+            // Since we don't always take the `wlock` to read it
+            // (like in `hg status`), it is susceptible to races.
+            // A simple retry method should be enough since full rewrites
+            // only happen when too much garbage data is present and
+            // this race is unlikely.
+            let mut tries = 0;
+
+            while tries < V2_MAX_READ_ATTEMPTS {
+                tries += 1;
+                match self.read_docket_and_data_file() {
+                    Ok(m) => {
+                        return Ok(m);
+                    }
+                    Err(e) => match e {
+                        DirstateError::Common(HgError::RaceDetected(
+                            context,
+                        )) => {
+                            log::info!(
+                                "dirstate read race detected {} (retry {}/{})",
+                                context,
+                                tries,
+                                V2_MAX_READ_ATTEMPTS,
+                            );
+                            continue;
+                        }
+                        _ => return Err(e),
+                    },
+                }
+            }
+            let error = HgError::abort(
+                format!("dirstate read race happened {tries} times in a row"),
+                255,
+                None,
+            );
+            Err(DirstateError::Common(error))
+        } else {
+            debug_wait_for_file_or_print(
+                self.config(),
+                "dirstate.pre-read-file",
+            );
+            let identity = self.dirstate_identity()?;
+            let dirstate_file_contents = self.dirstate_file_contents()?;
+            if dirstate_file_contents.is_empty() {
+                self.dirstate_parents.set(DirstateParents::NULL);
+                Ok(OwningDirstateMap::new_empty(Vec::new()))
+            } else {
+                let (map, parents) = OwningDirstateMap::new_v1(
+                    dirstate_file_contents,
+                    identity,
+                )?;
+                self.dirstate_parents.set(parents);
+                Ok(map)
+            }
+        }
+    }
+
+    fn read_docket_and_data_file(
+        &self,
+    ) -> Result<OwningDirstateMap, DirstateError> {
+        debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
         let dirstate_file_contents = self.dirstate_file_contents()?;
+        let identity = self.dirstate_identity()?;
         if dirstate_file_contents.is_empty() {
             self.dirstate_parents.set(DirstateParents::NULL);
-            if self.has_dirstate_v2() {
-                self.dirstate_data_file_uuid.set(None);
-            }
-            Ok(OwningDirstateMap::new_empty(Vec::new()))
-        } else if self.has_dirstate_v2() {
-            let docket = crate::dirstate_tree::on_disk::read_docket(
-                &dirstate_file_contents,
-            )?;
-            self.dirstate_parents.set(docket.parents());
-            self.dirstate_data_file_uuid
-                .set(Some(docket.uuid.to_owned()));
-            let data_size = docket.data_size();
-            let metadata = docket.tree_metadata();
-            if let Some(data_mmap) = self
+            return Ok(OwningDirstateMap::new_empty(Vec::new()));
+        }
+        let docket = crate::dirstate_tree::on_disk::read_docket(
+            &dirstate_file_contents,
+        )?;
+        debug_wait_for_file_or_print(
+            self.config(),
+            "dirstate.post-docket-read-file",
+        );
+        self.dirstate_parents.set(docket.parents());
+        let uuid = docket.uuid.to_owned();
+        let data_size = docket.data_size();
+
+        let context = "between reading dirstate docket and data file";
+        let race_error = HgError::RaceDetected(context.into());
+        let metadata = docket.tree_metadata();
+
+        let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
+            // Don't mmap on NFS to prevent `SIGBUS` error on deletion
+            let contents = self.hg_vfs().read(docket.data_filename());
+            let contents = match contents {
+                Ok(c) => c,
+                Err(HgError::IoError { error, context }) => {
+                    match error.raw_os_error().expect("real os error") {
+                        // 2 = ENOENT, No such file or directory
+                        // 116 = ESTALE, Stale NFS file handle
+                        //
+                        // TODO match on `error.kind()` when
+                        // `ErrorKind::StaleNetworkFileHandle` is stable.
+                        2 | 116 => {
+                            // Race where the data file was deleted right after
+                            // we read the docket, try again
+                            return Err(race_error.into());
+                        }
+                        _ => {
+                            return Err(
+                                HgError::IoError { error, context }.into()
+                            )
+                        }
+                    }
+                }
+                Err(e) => return Err(e.into()),
+            };
+            OwningDirstateMap::new_v2(
+                contents, data_size, metadata, uuid, identity,
+            )
+        } else {
+            match self
                 .hg_vfs()
                 .mmap_open(docket.data_filename())
-                .io_not_found_as_none()?
+                .io_not_found_as_none()
             {
-                OwningDirstateMap::new_v2(data_mmap, data_size, metadata)
-            } else {
-                OwningDirstateMap::new_v2(Vec::new(), data_size, metadata)
+                Ok(Some(data_mmap)) => OwningDirstateMap::new_v2(
+                    data_mmap, data_size, metadata, uuid, identity,
+                ),
+                Ok(None) => {
+                    // Race where the data file was deleted right after we
+                    // read the docket, try again
+                    return Err(race_error.into());
+                }
+                Err(e) => return Err(e.into()),
             }
-        } else {
-            let (map, parents) =
-                OwningDirstateMap::new_v1(dirstate_file_contents)?;
-            self.dirstate_parents.set(parents);
-            Ok(map)
-        }
+        }?;
+
+        let write_mode_config = self
+            .config()
+            .get_str(b"devel", b"dirstate.v2.data_update_mode")
+            .unwrap_or(Some("auto"))
+            .unwrap_or("auto"); // don't bother for devel options
+        let write_mode = match write_mode_config {
+            "auto" => DirstateMapWriteMode::Auto,
+            "force-new" => DirstateMapWriteMode::ForceNewDataFile,
+            "force-append" => DirstateMapWriteMode::ForceAppend,
+            _ => DirstateMapWriteMode::Auto,
+        };
+
+        map.with_dmap_mut(|m| m.set_write_mode(write_mode));
+
+        Ok(map)
     }
 
     pub fn dirstate_map(
@@ -421,13 +551,37 @@
         // it’s unset
         let parents = self.dirstate_parents()?;
         let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() {
-            let uuid_opt = self
-                .dirstate_data_file_uuid
-                .get_or_init(|| self.read_dirstate_data_file_uuid())?;
-            let uuid_opt = uuid_opt.as_ref();
-            let can_append = uuid_opt.is_some();
+            let (identity, uuid, data_size) =
+                self.get_dirstate_data_file_integrity()?;
+            let identity_changed = identity != map.old_identity();
+            let uuid_changed = uuid.as_deref() != map.old_uuid();
+            let data_length_changed = data_size != map.old_data_size();
+
+            if identity_changed || uuid_changed || data_length_changed {
+                // If any of identity, uuid or length have changed since
+                // last disk read, don't write.
+                // This is fine because either we're in a command that doesn't
+                // write anything too important (like `hg status`), or we're in
+                // `hg add` and we're supposed to have taken the lock before
+                // reading anyway.
+                //
+                // TODO complain loudly if we've changed anything important
+                // without taking the lock.
+                // (see `hg help config.format.use-dirstate-tracked-hint`)
+                log::debug!(
+                    "dirstate has changed since last read, not updating."
+                );
+                return Ok(());
+            }
+
+            let uuid_opt = map.old_uuid();
+            let write_mode = if uuid_opt.is_some() {
+                DirstateMapWriteMode::Auto
+            } else {
+                DirstateMapWriteMode::ForceNewDataFile
+            };
             let (data, tree_metadata, append, old_data_size) =
-                map.pack_v2(can_append)?;
+                map.pack_v2(write_mode)?;
 
             // Reuse the uuid, or generate a new one, keeping the old for
             // deletion.
@@ -470,7 +624,10 @@
             //   https://github.com/openzfs/zfs/issues/13370
             //
             if !append {
+                log::trace!("creating a new dirstate data file");
                 options.create_new(true);
+            } else {
+                log::trace!("appending to the dirstate data file");
             }
 
             let data_size = (|| {
@@ -499,6 +656,22 @@
 
             (packed_dirstate, old_uuid)
         } else {
+            let identity = self.dirstate_identity()?;
+            if identity != map.old_identity() {
+                // If identity changed since last disk read, don't write.
+                // This is fine because either we're in a command that doesn't
+                // write anything too important (like `hg status`), or we're in
+                // `hg add` and we're supposed to have taken the lock before
+                // reading anyway.
+                //
+                // TODO complain loudly if we've changed anything important
+                // without taking the lock.
+                // (see `hg help config.format.use-dirstate-tracked-hint`)
+                log::debug!(
+                    "dirstate has changed since last read, not updating."
+                );
+                return Ok(());
+            }
             (map.pack_v1(parents)?, None)
         };