dirstate: deal with read-race for pure rust code path (rhg) stable
authorPierre-Yves David <pierre-yves.david@octobus.net>
Tue, 28 Feb 2023 19:36:46 +0100
branchstable
changeset 50239 491f3dd080eb
parent 50238 c9066fc609ef
child 50240 3ddff85fa2c8
dirstate: deal with read-race for pure rust code path (rhg) If we cannot read the dirstate data, this is probably because a writing process wrote it under our feet. So refresh the docket and try again a handful of time.
rust/hg-core/src/errors.rs
rust/hg-core/src/repo.rs
tests/test-dirstate-read-race.t
--- a/rust/hg-core/src/errors.rs	Tue Feb 28 23:35:52 2023 +0100
+++ b/rust/hg-core/src/errors.rs	Tue Feb 28 19:36:46 2023 +0100
@@ -46,6 +46,9 @@
 
     /// Censored revision data.
     CensoredNodeError,
+    /// A race condition has been detected. This *must* be handled locally
+    /// and not directly surface to the user.
+    RaceDetected(String),
 }
 
 /// Details about where an I/O error happened
@@ -111,6 +114,9 @@
                 write!(f, "encountered a censored node")
             }
             HgError::ConfigValueParseError(error) => error.fmt(f),
+            HgError::RaceDetected(context) => {
+                write!(f, "encountered a race condition {context}")
+            }
         }
     }
 }
--- a/rust/hg-core/src/repo.rs	Tue Feb 28 23:35:52 2023 +0100
+++ b/rust/hg-core/src/repo.rs	Tue Feb 28 19:36:46 2023 +0100
@@ -24,6 +24,8 @@
 use std::io::Write as IoWrite;
 use std::path::{Path, PathBuf};
 
+const V2_MAX_READ_ATTEMPTS: usize = 5;
+
 /// A repository on disk
 pub struct Repo {
     working_directory: PathBuf,
@@ -307,14 +309,49 @@
 
     fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
         if self.has_dirstate_v2() {
-            self.read_docket_and_data_file()
+            // The v2 dirstate is split into a docket and a data file.
+            // Since we don't always take the `wlock` to read it
+            // (like in `hg status`), it is susceptible to races.
+            // A simple retry method should be enough since full rewrites
+            // only happen when too much garbage data is present and
+            // this race is unlikely.
+            let mut tries = 0;
+
+            while tries < V2_MAX_READ_ATTEMPTS {
+                tries += 1;
+                match self.read_docket_and_data_file() {
+                    Ok(m) => {
+                        return Ok(m);
+                    }
+                    Err(e) => match e {
+                        DirstateError::Common(HgError::RaceDetected(
+                            context,
+                        )) => {
+                            log::info!(
+                                "dirstate read race detected {} (retry {}/{})",
+                                context,
+                                tries,
+                                V2_MAX_READ_ATTEMPTS,
+                            );
+                            continue;
+                        }
+                        _ => return Err(e.into()),
+                    },
+                }
+            }
+            let error = HgError::abort(
+                format!("dirstate read race happened {tries} times in a row"),
+                255,
+                None,
+            );
+            return Err(DirstateError::Common(error));
         } else {
             debug_wait_for_file_or_print(
                 self.config(),
                 "dirstate.pre-read-file",
             );
             let dirstate_file_contents = self.dirstate_file_contents()?;
-            if dirstate_file_contents.is_empty() {
+            return if dirstate_file_contents.is_empty() {
                 self.dirstate_parents.set(DirstateParents::NULL);
                 Ok(OwningDirstateMap::new_empty(Vec::new()))
             } else {
@@ -322,7 +359,7 @@
                     OwningDirstateMap::new_v1(dirstate_file_contents)?;
                 self.dirstate_parents.set(parents);
                 Ok(map)
-            }
+            };
         }
     }
 
@@ -347,22 +384,54 @@
         self.dirstate_data_file_uuid
             .set(Some(docket.uuid.to_owned()));
         let data_size = docket.data_size();
+
+        let context = "between reading dirstate docket and data file";
+        let race_error = HgError::RaceDetected(context.into());
         let metadata = docket.tree_metadata();
+
         let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
             // Don't mmap on NFS to prevent `SIGBUS` error on deletion
-            OwningDirstateMap::new_v2(
-                self.hg_vfs().read(docket.data_filename())?,
-                data_size,
-                metadata,
-            )
-        } else if let Some(data_mmap) = self
-            .hg_vfs()
-            .mmap_open(docket.data_filename())
-            .io_not_found_as_none()?
-        {
-            OwningDirstateMap::new_v2(data_mmap, data_size, metadata)
+            let contents = self.hg_vfs().read(docket.data_filename());
+            let contents = match contents {
+                Ok(c) => c,
+                Err(HgError::IoError { error, context }) => {
+                    match error.raw_os_error().expect("real os error") {
+                        // 2 = ENOENT, No such file or directory
+                        // 116 = ESTALE, Stale NFS file handle
+                        //
+                        // TODO match on `error.kind()` when
+                        // `ErrorKind::StaleNetworkFileHandle` is stable.
+                        2 | 116 => {
+                            // Race where the data file was deleted right after
+                            // we read the docket, try again
+                            return Err(race_error.into());
+                        }
+                        _ => {
+                            return Err(
+                                HgError::IoError { error, context }.into()
+                            )
+                        }
+                    }
+                }
+                Err(e) => return Err(e.into()),
+            };
+            OwningDirstateMap::new_v2(contents, data_size, metadata)
         } else {
-            OwningDirstateMap::new_v2(Vec::new(), data_size, metadata)
+            match self
+                .hg_vfs()
+                .mmap_open(docket.data_filename())
+                .io_not_found_as_none()
+            {
+                Ok(Some(data_mmap)) => {
+                    OwningDirstateMap::new_v2(data_mmap, data_size, metadata)
+                }
+                Ok(None) => {
+                    // Race where the data file was deleted right after we
+                    // read the docket, try again
+                    return Err(race_error.into());
+                }
+                Err(e) => return Err(e.into()),
+            }
         }?;
 
         let write_mode_config = self
--- a/tests/test-dirstate-read-race.t	Tue Feb 28 23:35:52 2023 +0100
+++ b/tests/test-dirstate-read-race.t	Tue Feb 28 19:36:46 2023 +0100
@@ -196,8 +196,12 @@
   $ cat $TESTTMP/status-race-lock.log
 #else
   $ cat $TESTTMP/status-race-lock.out
+  A dir/n
+  A dir/o
+  R dir/nested/m
+  ? p
+  ? q
   $ cat $TESTTMP/status-race-lock.log
-  abort: dirstate-v2 parse error: not enough bytes on disk
 #endif
 #endif
 #else
@@ -305,8 +309,10 @@
   $ cat $TESTTMP/status-race-lock.log
 #else
   $ cat $TESTTMP/status-race-lock.out
+  ? dir/n
+  ? p
+  ? q
   $ cat $TESTTMP/status-race-lock.log
-  abort: dirstate-v2 parse error: not enough bytes on disk
 #endif
 #endif
 #else
@@ -441,8 +447,11 @@
   $ cat $TESTTMP/status-race-lock.log
 #else
   $ cat $TESTTMP/status-race-lock.out
+  A dir/o
+  ? dir/n
+  ? p
+  ? q
   $ cat $TESTTMP/status-race-lock.log
-  abort: dirstate-v2 parse error: not enough bytes on disk
 #endif
 #endif
 #else
@@ -543,8 +552,12 @@
   $ cat $TESTTMP/status-race-lock.log
 #else
   $ cat $TESTTMP/status-race-lock.out
+  A dir/o
+  R dir/nested/m
+  ? dir/n
+  ? p
+  ? q
   $ cat $TESTTMP/status-race-lock.log
-  abort: dirstate-v2 parse error: not enough bytes on disk
 #endif
 #endif
 #else