dirstate-tree: Use HashMap instead of BTreeMap
authorSimon Sapin <simon.sapin@octobus.net>
Thu, 29 Apr 2021 11:32:57 +0200
changeset 47119 15395fd8ab28
parent 47118 c92e63762573
child 47120 7109a38830c9
dirstate-tree: Use HashMap instead of BTreeMap BTreeMap has the advantage of its "natural" iteration order being the one we need in the status algorithm. With HashMap however, iteration order is undefined so we need to allocate a Vec and sort it explicitly. Unfortunately many BTreeMap operations are slower than in HashMap, and skipping that extra allocation and sort is not enough to compensate. Switching to HashMap + sort makes `hg status` 17% faster in one test case, as measure with hyperfine: ``` Benchmark #1: ../hg2/hg status -R $REPO --config=experimental.dirstate-tree.in-memory=1 Time (mean ± σ): 765.0 ms ± 8.8 ms [User: 1.352 s, System: 0.747 s] Range (min … max): 751.8 ms … 778.7 ms 10 runs Benchmark #2: ./hg status -R $REPO --config=experimental.dirstate-tree.in-memory=1 Time (mean ± σ): 651.8 ms ± 9.9 ms [User: 1.251 s, System: 0.799 s] Range (min … max): 642.2 ms … 671.8 ms 10 runs Summary './hg status -R $REPO --config=experimental.dirstate-tree.in-memory=1' ran 1.17 ± 0.02 times faster than '../hg2/hg status -R $REPO --config=experimental.dirstate-tree.in-memory=1' ``` * ./hg is this revision * ../hg2/hg is its parent * $REPO is an old snapshot of mozilla-central Differential Revision: https://phab.mercurial-scm.org/D10553
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/path_with_basename.rs
rust/hg-core/src/dirstate_tree/status.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Tue Apr 27 17:49:38 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Thu Apr 29 11:32:57 2021 +0200
@@ -1,7 +1,7 @@
 use bytes_cast::BytesCast;
 use micro_timer::timed;
+use std::convert::TryInto;
 use std::path::PathBuf;
-use std::{collections::BTreeMap, convert::TryInto};
 
 use super::path_with_basename::WithBasename;
 use crate::dirstate::parsers::clear_ambiguous_mtime;
@@ -20,6 +20,7 @@
 use crate::DirstateParents;
 use crate::DirstateStatus;
 use crate::EntryState;
+use crate::FastHashMap;
 use crate::PatternFileWarning;
 use crate::StateMapIter;
 use crate::StatusError;
@@ -43,7 +44,7 @@
 /// path, so comparing full paths gives the same result as comparing base
 /// names. However `BTreeMap` would waste time always re-comparing the same
 /// string prefix.
-pub(super) type ChildNodes = BTreeMap<WithBasename<HgPathBuf>, Node>;
+pub(super) type ChildNodes = FastHashMap<WithBasename<HgPathBuf>, Node>;
 
 /// Represents a file or a directory
 #[derive(Default)]
@@ -86,7 +87,7 @@
         Self {
             parents: None,
             dirty_parents: false,
-            root: ChildNodes::new(),
+            root: ChildNodes::default(),
             nodes_with_entry_count: 0,
             nodes_with_copy_source_count: 0,
         }
--- a/rust/hg-core/src/dirstate_tree/path_with_basename.rs	Tue Apr 27 17:49:38 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/path_with_basename.rs	Thu Apr 29 11:32:57 2021 +0200
@@ -55,6 +55,12 @@
     }
 }
 
+impl<T: AsRef<HgPath>> std::hash::Hash for WithBasename<T> {
+    fn hash<H: std::hash::Hasher>(&self, hasher: &mut H) {
+        self.base_name().hash(hasher)
+    }
+}
+
 impl<T: AsRef<HgPath> + PartialEq> PartialEq for WithBasename<T> {
     fn eq(&self, other: &Self) -> bool {
         self.base_name() == other.base_name()
--- a/rust/hg-core/src/dirstate_tree/status.rs	Tue Apr 27 17:49:38 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/status.rs	Thu Apr 29 11:32:57 2021 +0200
@@ -110,11 +110,11 @@
 
         // `merge_join_by` requires both its input iterators to be sorted:
 
-        //
-        // * `BTreeMap` iterates according to keys’ ordering by definition
-
+        let mut dirstate_nodes: Vec<_> = dirstate_nodes.iter_mut().collect();
         // `sort_unstable_by_key` doesn’t allow keys borrowing from the value:
         // https://github.com/rust-lang/rust/issues/34162
+        dirstate_nodes
+            .sort_unstable_by(|(path1, _), (path2, _)| path1.cmp(path2));
         fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name));
 
         itertools::merge_join_by(