dirstate-tree: optimize HashMap lookups with raw_entry_mut
authorSimon Sapin <simon.sapin@octobus.net>
Tue, 08 Feb 2022 15:51:52 +0100
changeset 48950 11c0411bf4e2
parent 48949 469b9ee336a6
child 48951 cf99c4af1079
dirstate-tree: optimize HashMap lookups with raw_entry_mut This switches to using `HashMap` from the hashbrown crate, in order to use its `raw_entry_mut` method. The standard library’s `HashMap` is also based on this same crate, but `raw_entry_mut` is not yet stable there: https://github.com/rust-lang/rust/issues/56167 Using version 0.9 because 0.10 is yanked and 0.11 requires Rust 1.49 This replaces in `DirstateMap::get_or_insert_node` a call to `HashMap<K, V>::entry` with `K = WithBasename<Cow<'on_disk, HgPath>>`. `entry` takes and consumes an "owned" `key: K` parameter, in case a new entry ends up inserted. This key is converted by `to_cow` from a value that borrows the `'path` lifetime. When this function is called by `Dirstate::new_v1`, `'path` is in fact the same as `'on_disk` so `to_cow` can return an owned key that contains `Cow::Borrowed`. For other callers, `to_cow` needs to create a `Cow::Owned` and thus make a costly heap memory allocation. This is wasteful if this key was already present in the map. Even when inserting a new node this is typically the case for its ancestor nodes (assuming most directories have numerous descendants). Differential Revision: https://phab.mercurial-scm.org/D12317
rust/Cargo.lock
rust/hg-core/Cargo.toml
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/lib.rs
--- a/rust/Cargo.lock	Fri Mar 04 13:33:55 2022 +0100
+++ b/rust/Cargo.lock	Tue Feb 08 15:51:52 2022 +0100
@@ -9,6 +9,12 @@
 checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e"
 
 [[package]]
+name = "ahash"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "739f4a8db6605981345c5654f3a85b056ce52f37a39d34da03f25bf2151ea16e"
+
+[[package]]
 name = "aho-corasick"
 version = "0.7.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -372,6 +378,16 @@
 checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
 
 [[package]]
+name = "hashbrown"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04"
+dependencies = [
+ "ahash",
+ "rayon",
+]
+
+[[package]]
 name = "hermit-abi"
 version = "0.1.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -398,6 +414,7 @@
  "derive_more",
  "flate2",
  "format-bytes",
+ "hashbrown",
  "home",
  "im-rc",
  "itertools",
--- a/rust/hg-core/Cargo.toml	Fri Mar 04 13:33:55 2022 +0100
+++ b/rust/hg-core/Cargo.toml	Tue Feb 08 15:51:52 2022 +0100
@@ -13,6 +13,7 @@
 bytes-cast = "0.2"
 byteorder = "1.3.4"
 derive_more = "0.99"
+hashbrown = {version = "0.9.1", features = ["rayon"]}
 home = "0.5"
 im-rc = "15.0.*"
 itertools = "0.9"
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Fri Mar 04 13:33:55 2022 +0100
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Tue Feb 08 15:51:52 2022 +0100
@@ -22,7 +22,7 @@
 use crate::DirstateParents;
 use crate::DirstateStatus;
 use crate::EntryState;
-use crate::FastHashMap;
+use crate::FastHashbrownMap as FastHashMap;
 use crate::PatternFileWarning;
 use crate::StatusError;
 use crate::StatusOptions;
@@ -585,13 +585,11 @@
             .next()
             .expect("expected at least one inclusive ancestor");
         loop {
-            // TODO: can we avoid allocating an owned key in cases where the
-            // map already contains that key, without introducing double
-            // lookup?
-            let child_node = child_nodes
+            let (_, child_node) = child_nodes
                 .make_mut(on_disk, unreachable_bytes)?
-                .entry(to_cow(ancestor_path))
-                .or_default();
+                .raw_entry_mut()
+                .from_key(ancestor_path.base_name())
+                .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
             if let Some(next) = inclusive_ancestor_paths.next() {
                 each_ancestor(child_node);
                 ancestor_path = next;
--- a/rust/hg-core/src/lib.rs	Fri Mar 04 13:33:55 2022 +0100
+++ b/rust/hg-core/src/lib.rs	Tue Feb 08 15:51:52 2022 +0100
@@ -56,6 +56,11 @@
 /// write access to your repository, you have other issues.
 pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
 
+// TODO: should this be the default `FastHashMap` for all of hg-core, not just
+// dirstate_tree? How does XxHash compare with AHash, hashbrown’s default?
+pub type FastHashbrownMap<K, V> =
+    hashbrown::HashMap<K, V, RandomXxHashBuilder64>;
+
 #[derive(Debug, PartialEq)]
 pub enum DirstateMapError {
     PathNotFound(HgPathBuf),