dirstate-tree: Skip readdir() in `hg status -mard`
authorSimon Sapin <simon.sapin@octobus.net>
Wed, 19 May 2021 16:18:16 +0200
changeset 47338 f27f2afb15da
parent 47337 0654b3b3d2b5
child 47339 43f6a7bb4e12
dirstate-tree: Skip readdir() in `hg status -mard` When running the status algorithm in a mode where we don’t list unknown or ignored files, all we care about are files that are listed in the dirstate. We can there for skip making expensive calls to readdir() to list the contents of filesystem directories, and instead only run stat() to get the filesystem state of files listed in the dirstate. (This state may be an error for files that don’t exist anymore on the filesystem.) On 16 CPU threads, this reduces the time spent in the `status()` function for `hg status -mard` on an old snapshot of mozilla-central from ~70ms to ~50ms. Differential Revision: https://phab.mercurial-scm.org/D10752
rust/hg-core/src/dirstate_tree/status.rs
--- a/rust/hg-core/src/dirstate_tree/status.rs	Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/status.rs	Wed May 19 16:18:16 2021 +0200
@@ -6,6 +6,7 @@
 use crate::matchers::get_ignore_function;
 use crate::matchers::Matcher;
 use crate::utils::files::get_bytes_from_os_string;
+use crate::utils::files::get_path_from_bytes;
 use crate::utils::hg_path::HgPath;
 use crate::BadMatch;
 use crate::DirstateStatus;
@@ -83,14 +84,17 @@
         fs_path: &Path,
         is_at_repo_root: bool,
     ) -> Result<Vec<DirEntry>, ()> {
-        DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| {
-            let errno = error.raw_os_error().expect("expected real OS error");
-            self.outcome
-                .lock()
-                .unwrap()
-                .bad
-                .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
-        })
+        DirEntry::read_dir(fs_path, is_at_repo_root)
+            .map_err(|error| self.io_error(error, hg_path))
+    }
+
+    fn io_error(&self, error: std::io::Error, hg_path: &HgPath) {
+        let errno = error.raw_os_error().expect("expected real OS error");
+        self.outcome
+            .lock()
+            .unwrap()
+            .bad
+            .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
     }
 
     fn traverse_fs_directory_and_dirstate(
@@ -101,6 +105,35 @@
         directory_fs_path: &Path,
         is_at_repo_root: bool,
     ) -> Result<(), DirstateV2ParseError> {
+        if !self.options.list_unknown && !self.options.list_ignored {
+            // We only care about files in the dirstate, so we can skip listing
+            // filesystem directories entirely.
+            return dirstate_nodes
+                .par_iter()
+                .map(|dirstate_node| {
+                    let fs_path = directory_fs_path.join(get_path_from_bytes(
+                        dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
+                    ));
+                    match std::fs::symlink_metadata(&fs_path) {
+                        Ok(fs_metadata) => self.traverse_fs_and_dirstate(
+                            &fs_path,
+                            &fs_metadata,
+                            dirstate_node,
+                            has_ignored_ancestor,
+                        ),
+                        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
+                            self.traverse_dirstate_only(dirstate_node)
+                        }
+                        Err(error) => {
+                            let hg_path =
+                                dirstate_node.full_path(self.dmap.on_disk)?;
+                            Ok(self.io_error(error, hg_path))
+                        }
+                    }
+                })
+                .collect();
+        }
+
         let mut fs_entries = if let Ok(entries) = self.read_dir(
             directory_hg_path,
             directory_fs_path,
@@ -141,7 +174,8 @@
             match pair {
                 Both(dirstate_node, fs_entry) => self
                     .traverse_fs_and_dirstate(
-                        fs_entry,
+                        &fs_entry.full_path,
+                        &fs_entry.metadata,
                         dirstate_node,
                         has_ignored_ancestor,
                     ),
@@ -160,12 +194,13 @@
 
     fn traverse_fs_and_dirstate(
         &self,
-        fs_entry: &DirEntry,
+        fs_path: &Path,
+        fs_metadata: &std::fs::Metadata,
         dirstate_node: NodeRef<'tree, '_>,
         has_ignored_ancestor: bool,
     ) -> Result<(), DirstateV2ParseError> {
         let hg_path = dirstate_node.full_path(self.dmap.on_disk)?;
-        let file_type = fs_entry.metadata.file_type();
+        let file_type = fs_metadata.file_type();
         let file_or_symlink = file_type.is_file() || file_type.is_symlink();
         if !file_or_symlink {
             // If we previously had a file here, it was removed (with
@@ -186,7 +221,7 @@
                 is_ignored,
                 dirstate_node.children(self.dmap.on_disk)?,
                 hg_path,
-                &fs_entry.full_path,
+                fs_path,
                 is_at_repo_root,
             )?
         } else {
@@ -209,9 +244,8 @@
                             .unwrap()
                             .modified
                             .push(full_path),
-                        EntryState::Normal => {
-                            self.handle_normal_file(&dirstate_node, fs_entry)?
-                        }
+                        EntryState::Normal => self
+                            .handle_normal_file(&dirstate_node, fs_metadata)?,
                         // This variant is not used in DirstateMap
                         // nodes
                         EntryState::Unknown => unreachable!(),
@@ -239,7 +273,7 @@
     fn handle_normal_file(
         &self,
         dirstate_node: &NodeRef<'tree, '_>,
-        fs_entry: &DirEntry,
+        fs_metadata: &std::fs::Metadata,
     ) -> Result<(), DirstateV2ParseError> {
         // Keep the low 31 bits
         fn truncate_u64(value: u64) -> i32 {
@@ -253,13 +287,12 @@
             .entry()?
             .expect("handle_normal_file called with entry-less node");
         let full_path = Cow::from(dirstate_node.full_path(self.dmap.on_disk)?);
-        let mode_changed = || {
-            self.options.check_exec && entry.mode_changed(&fs_entry.metadata)
-        };
-        let size_changed = entry.size != truncate_u64(fs_entry.metadata.len());
+        let mode_changed =
+            || self.options.check_exec && entry.mode_changed(fs_metadata);
+        let size_changed = entry.size != truncate_u64(fs_metadata.len());
         if entry.size >= 0
             && size_changed
-            && fs_entry.metadata.file_type().is_symlink()
+            && fs_metadata.file_type().is_symlink()
         {
             // issue6456: Size returned may be longer due to encryption
             // on EXT-4 fscrypt. TODO maybe only do it on EXT4?
@@ -270,7 +303,7 @@
         {
             self.outcome.lock().unwrap().modified.push(full_path)
         } else {
-            let mtime = mtime_seconds(&fs_entry.metadata);
+            let mtime = mtime_seconds(fs_metadata);
             if truncate_i64(mtime) != entry.mtime
                 || mtime == self.options.last_normal_time
             {