dirstate-tree: Change status() results to not borrow DirstateMap
authorSimon Sapin <simon.sapin@octobus.net>
Fri, 28 May 2021 20:07:27 +0200
changeset 47347 73ddcedeaadf
parent 47346 5e12b6bfdd3e
child 47348 a4de570e61fa
dirstate-tree: Change status() results to not borrow DirstateMap The `status` function takes a `&'tree mut DirstateMap<'on_disk>` parameter. `'on_disk` borrows a read-only byte buffer with the contents of the `.hg/dirstate` file. `DirstateMap` internally uses represents file paths as `std::borrow::Cow<'on_disk, HgPath>`, which borrows the byte buffer when possible and allocates an owned string if not, such as for files added to the dirstate after it was loaded from disk. Previously the return type of of `status` has a `'tree` lifetime, meaning it could borrow all paths from the `DirstateMap`. With this changeset, that lifetime is changed to `'on_disk` meaning that only paths from the byte buffer can be borrowed, and paths allocated by `DirstateMap` must be copied. Usually most paths are in the byte buffer, and most paths are not part of the return value of `status`, so the number of extra copies should be small. This change will enable `status` to mutate the `DirstateMap` after it has finished constructing its return value. Previously such mutation would be prevented by possible on-going borrows. Differential Revision: https://phab.mercurial-scm.org/D10824
rust/hg-core/src/dirstate/status.rs
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/status.rs
rust/hg-core/src/operations/dirstate_status.rs
--- a/rust/hg-core/src/dirstate/status.rs	Fri May 28 12:16:14 2021 +0200
+++ b/rust/hg-core/src/dirstate/status.rs	Fri May 28 20:07:27 2021 +0200
@@ -292,7 +292,7 @@
     pub unsure: Vec<HgPathCow<'a>>,
 
     /// Only filled if `collect_traversed_dirs` is `true`
-    pub traversed: Vec<HgPathBuf>,
+    pub traversed: Vec<HgPathCow<'a>>,
 }
 
 #[derive(Debug, derive_more::From)]
@@ -880,7 +880,7 @@
 #[timed]
 pub fn build_response<'a>(
     results: impl IntoIterator<Item = DispatchedPath<'a>>,
-    traversed: Vec<HgPathBuf>,
+    traversed: Vec<HgPathCow<'a>>,
 ) -> DirstateStatus<'a> {
     let mut unsure = vec![];
     let mut modified = vec![];
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Fri May 28 12:16:14 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Fri May 28 20:07:27 2021 +0200
@@ -46,6 +46,13 @@
 /// string prefix.
 pub(super) type NodeKey<'on_disk> = WithBasename<Cow<'on_disk, HgPath>>;
 
+/// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned
+/// for on-disk nodes that don’t actually have a `Cow` to borrow.
+pub(super) enum BorrowedPath<'tree, 'on_disk> {
+    InMemory(&'tree HgPathBuf),
+    OnDisk(&'on_disk HgPath),
+}
+
 pub(super) enum ChildNodes<'on_disk> {
     InMemory(FastHashMap<NodeKey<'on_disk>, Node<'on_disk>>),
     OnDisk(&'on_disk [on_disk::Node]),
@@ -61,6 +68,26 @@
     OnDisk(&'on_disk on_disk::Node),
 }
 
+impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> {
+    pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> {
+        match *self {
+            BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()),
+            BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk),
+        }
+    }
+}
+
+impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> {
+    type Target = HgPath;
+
+    fn deref(&self) -> &HgPath {
+        match *self {
+            BorrowedPath::InMemory(in_memory) => in_memory,
+            BorrowedPath::OnDisk(on_disk) => on_disk,
+        }
+    }
+}
+
 impl Default for ChildNodes<'_> {
     fn default() -> Self {
         ChildNodes::InMemory(Default::default())
@@ -210,15 +237,19 @@
         }
     }
 
-    /// Returns a `Cow` that can borrow 'on_disk but is detached from 'tree
-    pub(super) fn full_path_cow(
+    /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk,
+    /// HgPath>` detached from `'tree`
+    pub(super) fn full_path_borrowed(
         &self,
         on_disk: &'on_disk [u8],
-    ) -> Result<Cow<'on_disk, HgPath>, DirstateV2ParseError> {
+    ) -> Result<BorrowedPath<'tree, 'on_disk>, DirstateV2ParseError> {
         match self {
-            NodeRef::InMemory(path, _node) => Ok(path.full_path().clone()),
+            NodeRef::InMemory(path, _node) => match path.full_path() {
+                Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)),
+                Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)),
+            },
             NodeRef::OnDisk(node) => {
-                Ok(Cow::Borrowed(node.full_path(on_disk)?))
+                Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?))
             }
         }
     }
@@ -819,7 +850,10 @@
                     node.copy_source(self.on_disk)?,
                 );
                 if entry.mtime_is_ambiguous(now) {
-                    ambiguous_mtimes.push(node.full_path_cow(self.on_disk)?)
+                    ambiguous_mtimes.push(
+                        node.full_path_borrowed(self.on_disk)?
+                            .detach_from_tree(),
+                    )
                 }
             }
         }
@@ -855,7 +889,10 @@
             let node = node?;
             if let Some(entry) = node.entry()? {
                 if entry.mtime_is_ambiguous(now) {
-                    paths.push(node.full_path_cow(self.on_disk)?)
+                    paths.push(
+                        node.full_path_borrowed(self.on_disk)?
+                            .detach_from_tree(),
+                    )
                 }
             }
         }
--- a/rust/hg-core/src/dirstate_tree/status.rs	Fri May 28 12:16:14 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/status.rs	Fri May 28 20:07:27 2021 +0200
@@ -1,4 +1,5 @@
 use crate::dirstate::status::IgnoreFnType;
+use crate::dirstate_tree::dirstate_map::BorrowedPath;
 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
 use crate::dirstate_tree::dirstate_map::DirstateMap;
 use crate::dirstate_tree::dirstate_map::NodeRef;
@@ -17,7 +18,6 @@
 use crate::StatusOptions;
 use micro_timer::timed;
 use rayon::prelude::*;
-use std::borrow::Cow;
 use std::io;
 use std::path::Path;
 use std::path::PathBuf;
@@ -39,7 +39,7 @@
     root_dir: PathBuf,
     ignore_files: Vec<PathBuf>,
     options: StatusOptions,
-) -> Result<(DirstateStatus<'tree>, Vec<PatternFileWarning>), StatusError> {
+) -> Result<(DirstateStatus<'on_disk>, Vec<PatternFileWarning>), StatusError> {
     let (ignore_fn, warnings): (IgnoreFnType, _) =
         if options.list_ignored || options.list_unknown {
             get_ignore_function(ignore_files, &root_dir)?
@@ -55,7 +55,7 @@
         outcome: Mutex::new(DirstateStatus::default()),
     };
     let is_at_repo_root = true;
-    let hg_path = HgPath::new("");
+    let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
     let has_ignored_ancestor = false;
     common.traverse_fs_directory_and_dirstate(
         has_ignored_ancestor,
@@ -69,15 +69,15 @@
 
 /// Bag of random things needed by various parts of the algorithm. Reduces the
 /// number of parameters passed to functions.
-struct StatusCommon<'tree, 'a, 'on_disk: 'tree> {
+struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
     dmap: &'tree DirstateMap<'on_disk>,
     options: StatusOptions,
     matcher: &'a (dyn Matcher + Sync),
     ignore_fn: IgnoreFnType<'a>,
-    outcome: Mutex<DirstateStatus<'tree>>,
+    outcome: Mutex<DirstateStatus<'on_disk>>,
 }
 
-impl<'tree, 'a> StatusCommon<'tree, 'a, '_> {
+impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
     fn read_dir(
         &self,
         hg_path: &HgPath,
@@ -100,8 +100,8 @@
     fn traverse_fs_directory_and_dirstate(
         &self,
         has_ignored_ancestor: bool,
-        dirstate_nodes: ChildNodesRef<'tree, '_>,
-        directory_hg_path: &'tree HgPath,
+        dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
+        directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
         directory_fs_path: &Path,
         is_at_repo_root: bool,
     ) -> Result<(), DirstateV2ParseError> {
@@ -199,10 +199,10 @@
         &self,
         fs_path: &Path,
         fs_metadata: &std::fs::Metadata,
-        dirstate_node: NodeRef<'tree, '_>,
+        dirstate_node: NodeRef<'tree, 'on_disk>,
         has_ignored_ancestor: bool,
     ) -> Result<(), DirstateV2ParseError> {
-        let hg_path = dirstate_node.full_path(self.dmap.on_disk)?;
+        let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
         let file_type = fs_metadata.file_type();
         let file_or_symlink = file_type.is_file() || file_type.is_symlink();
         if !file_or_symlink {
@@ -210,13 +210,17 @@
             // `hg rm` or similar) or deleted before it could be
             // replaced by a directory or something else.
             self.mark_removed_or_deleted_if_file(
-                hg_path,
+                &hg_path,
                 dirstate_node.state()?,
             );
         }
         if file_type.is_dir() {
             if self.options.collect_traversed_dirs {
-                self.outcome.lock().unwrap().traversed.push(hg_path.into())
+                self.outcome
+                    .lock()
+                    .unwrap()
+                    .traversed
+                    .push(hg_path.detach_from_tree())
             }
             let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
             let is_at_repo_root = false;
@@ -229,24 +233,26 @@
             )?
         } else {
             if file_or_symlink && self.matcher.matches(hg_path) {
-                let full_path = Cow::from(hg_path);
                 if let Some(state) = dirstate_node.state()? {
                     match state {
-                        EntryState::Added => {
-                            self.outcome.lock().unwrap().added.push(full_path)
-                        }
+                        EntryState::Added => self
+                            .outcome
+                            .lock()
+                            .unwrap()
+                            .added
+                            .push(hg_path.detach_from_tree()),
                         EntryState::Removed => self
                             .outcome
                             .lock()
                             .unwrap()
                             .removed
-                            .push(full_path),
+                            .push(hg_path.detach_from_tree()),
                         EntryState::Merged => self
                             .outcome
                             .lock()
                             .unwrap()
                             .modified
-                            .push(full_path),
+                            .push(hg_path.detach_from_tree()),
                         EntryState::Normal => self
                             .handle_normal_file(&dirstate_node, fs_metadata)?,
                         // This variant is not used in DirstateMap
@@ -256,10 +262,7 @@
                 } else {
                     // `node.entry.is_none()` indicates a "directory"
                     // node, but the filesystem has a file
-                    self.mark_unknown_or_ignored(
-                        has_ignored_ancestor,
-                        full_path,
-                    )
+                    self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path)
                 }
             }
 
@@ -275,7 +278,7 @@
     /// filesystem
     fn handle_normal_file(
         &self,
-        dirstate_node: &NodeRef<'tree, '_>,
+        dirstate_node: &NodeRef<'tree, 'on_disk>,
         fs_metadata: &std::fs::Metadata,
     ) -> Result<(), DirstateV2ParseError> {
         // Keep the low 31 bits
@@ -289,7 +292,7 @@
         let entry = dirstate_node
             .entry()?
             .expect("handle_normal_file called with entry-less node");
-        let full_path = Cow::from(dirstate_node.full_path(self.dmap.on_disk)?);
+        let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
         let mode_changed =
             || self.options.check_exec && entry.mode_changed(fs_metadata);
         let size_changed = entry.size != truncate_u64(fs_metadata.len());
@@ -299,20 +302,36 @@
         {
             // issue6456: Size returned may be longer due to encryption
             // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
-            self.outcome.lock().unwrap().unsure.push(full_path)
+            self.outcome
+                .lock()
+                .unwrap()
+                .unsure
+                .push(hg_path.detach_from_tree())
         } else if dirstate_node.has_copy_source()
             || entry.is_from_other_parent()
             || (entry.size >= 0 && (size_changed || mode_changed()))
         {
-            self.outcome.lock().unwrap().modified.push(full_path)
+            self.outcome
+                .lock()
+                .unwrap()
+                .modified
+                .push(hg_path.detach_from_tree())
         } else {
             let mtime = mtime_seconds(fs_metadata);
             if truncate_i64(mtime) != entry.mtime
                 || mtime == self.options.last_normal_time
             {
-                self.outcome.lock().unwrap().unsure.push(full_path)
+                self.outcome
+                    .lock()
+                    .unwrap()
+                    .unsure
+                    .push(hg_path.detach_from_tree())
             } else if self.options.list_clean {
-                self.outcome.lock().unwrap().clean.push(full_path)
+                self.outcome
+                    .lock()
+                    .unwrap()
+                    .clean
+                    .push(hg_path.detach_from_tree())
             }
         }
         Ok(())
@@ -321,10 +340,10 @@
     /// A node in the dirstate tree has no corresponding filesystem entry
     fn traverse_dirstate_only(
         &self,
-        dirstate_node: NodeRef<'tree, '_>,
+        dirstate_node: NodeRef<'tree, 'on_disk>,
     ) -> Result<(), DirstateV2ParseError> {
         self.mark_removed_or_deleted_if_file(
-            dirstate_node.full_path(self.dmap.on_disk)?,
+            &dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
             dirstate_node.state()?,
         );
         dirstate_node
@@ -340,15 +359,23 @@
     /// Does nothing on a "directory" node
     fn mark_removed_or_deleted_if_file(
         &self,
-        hg_path: &'tree HgPath,
+        hg_path: &BorrowedPath<'tree, 'on_disk>,
         dirstate_node_state: Option<EntryState>,
     ) {
         if let Some(state) = dirstate_node_state {
             if self.matcher.matches(hg_path) {
                 if let EntryState::Removed = state {
-                    self.outcome.lock().unwrap().removed.push(hg_path.into())
+                    self.outcome
+                        .lock()
+                        .unwrap()
+                        .removed
+                        .push(hg_path.detach_from_tree())
                 } else {
-                    self.outcome.lock().unwrap().deleted.push(hg_path.into())
+                    self.outcome
+                        .lock()
+                        .unwrap()
+                        .deleted
+                        .push(hg_path.detach_from_tree())
                 }
             }
         }
@@ -395,23 +422,34 @@
                 self.outcome.lock().unwrap().traversed.push(hg_path.into())
             }
         } else if file_or_symlink && self.matcher.matches(&hg_path) {
-            self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into())
+            self.mark_unknown_or_ignored(
+                has_ignored_ancestor,
+                &BorrowedPath::InMemory(&hg_path),
+            )
         }
     }
 
     fn mark_unknown_or_ignored(
         &self,
         has_ignored_ancestor: bool,
-        hg_path: Cow<'tree, HgPath>,
+        hg_path: &BorrowedPath<'_, 'on_disk>,
     ) {
         let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path);
         if is_ignored {
             if self.options.list_ignored {
-                self.outcome.lock().unwrap().ignored.push(hg_path)
+                self.outcome
+                    .lock()
+                    .unwrap()
+                    .ignored
+                    .push(hg_path.detach_from_tree())
             }
         } else {
             if self.options.list_unknown {
-                self.outcome.lock().unwrap().unknown.push(hg_path)
+                self.outcome
+                    .lock()
+                    .unwrap()
+                    .unknown
+                    .push(hg_path.detach_from_tree())
             }
         }
     }
--- a/rust/hg-core/src/operations/dirstate_status.rs	Fri May 28 12:16:14 2021 +0200
+++ b/rust/hg-core/src/operations/dirstate_status.rs	Fri May 28 20:07:27 2021 +0200
@@ -61,7 +61,10 @@
         }
 
         drop(traversed_sender);
-        let traversed = traversed_receiver.into_iter().collect();
+        let traversed = traversed_receiver
+            .into_iter()
+            .map(std::borrow::Cow::Owned)
+            .collect();
 
         Ok(build_response(results, traversed))
     }