# HG changeset patch # User Simon Sapin # Date 1644331912 -3600 # Node ID 11c0411bf4e2086a640c3597fd770e088cc4b346 # Parent 469b9ee336a665e2999c2fd1b0f4ea9fd0134ae5 dirstate-tree: optimize HashMap lookups with raw_entry_mut This switches to using `HashMap` from the hashbrown crate, in order to use its `raw_entry_mut` method. The standard library’s `HashMap` is also based on this same crate, but `raw_entry_mut` is not yet stable there: https://github.com/rust-lang/rust/issues/56167 Using version 0.9 because 0.10 is yanked and 0.11 requires Rust 1.49 This replaces in `DirstateMap::get_or_insert_node` a call to `HashMap::entry` with `K = WithBasename>`. `entry` takes and consumes an "owned" `key: K` parameter, in case a new entry ends up inserted. This key is converted by `to_cow` from a value that borrows the `'path` lifetime. When this function is called by `Dirstate::new_v1`, `'path` is in fact the same as `'on_disk` so `to_cow` can return an owned key that contains `Cow::Borrowed`. For other callers, `to_cow` needs to create a `Cow::Owned` and thus make a costly heap memory allocation. This is wasteful if this key was already present in the map. Even when inserting a new node this is typically the case for its ancestor nodes (assuming most directories have numerous descendants). Differential Revision: https://phab.mercurial-scm.org/D12317 diff -r 469b9ee336a6 -r 11c0411bf4e2 rust/Cargo.lock --- a/rust/Cargo.lock Fri Mar 04 13:33:55 2022 +0100 +++ b/rust/Cargo.lock Tue Feb 08 15:51:52 2022 +0100 @@ -9,6 +9,12 @@ checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" [[package]] +name = "ahash" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "739f4a8db6605981345c5654f3a85b056ce52f37a39d34da03f25bf2151ea16e" + +[[package]] name = "aho-corasick" version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -372,6 +378,16 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] +name = "hashbrown" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" +dependencies = [ + "ahash", + "rayon", +] + +[[package]] name = "hermit-abi" version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -398,6 +414,7 @@ "derive_more", "flate2", "format-bytes", + "hashbrown", "home", "im-rc", "itertools", diff -r 469b9ee336a6 -r 11c0411bf4e2 rust/hg-core/Cargo.toml --- a/rust/hg-core/Cargo.toml Fri Mar 04 13:33:55 2022 +0100 +++ b/rust/hg-core/Cargo.toml Tue Feb 08 15:51:52 2022 +0100 @@ -13,6 +13,7 @@ bytes-cast = "0.2" byteorder = "1.3.4" derive_more = "0.99" +hashbrown = {version = "0.9.1", features = ["rayon"]} home = "0.5" im-rc = "15.0.*" itertools = "0.9" diff -r 469b9ee336a6 -r 11c0411bf4e2 rust/hg-core/src/dirstate_tree/dirstate_map.rs --- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Fri Mar 04 13:33:55 2022 +0100 +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Tue Feb 08 15:51:52 2022 +0100 @@ -22,7 +22,7 @@ use crate::DirstateParents; use crate::DirstateStatus; use crate::EntryState; -use crate::FastHashMap; +use crate::FastHashbrownMap as FastHashMap; use crate::PatternFileWarning; use crate::StatusError; use crate::StatusOptions; @@ -585,13 +585,11 @@ .next() .expect("expected at least one inclusive ancestor"); loop { - // TODO: can we avoid allocating an owned key in cases where the - // map already contains that key, without introducing double - // lookup? - let child_node = child_nodes + let (_, child_node) = child_nodes .make_mut(on_disk, unreachable_bytes)? - .entry(to_cow(ancestor_path)) - .or_default(); + .raw_entry_mut() + .from_key(ancestor_path.base_name()) + .or_insert_with(|| (to_cow(ancestor_path), Node::default())); if let Some(next) = inclusive_ancestor_paths.next() { each_ancestor(child_node); ancestor_path = next; diff -r 469b9ee336a6 -r 11c0411bf4e2 rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs Fri Mar 04 13:33:55 2022 +0100 +++ b/rust/hg-core/src/lib.rs Tue Feb 08 15:51:52 2022 +0100 @@ -56,6 +56,11 @@ /// write access to your repository, you have other issues. pub type FastHashMap = HashMap; +// TODO: should this be the default `FastHashMap` for all of hg-core, not just +// dirstate_tree? How does XxHash compare with AHash, hashbrown’s default? +pub type FastHashbrownMap = + hashbrown::HashMap; + #[derive(Debug, PartialEq)] pub enum DirstateMapError { PathNotFound(HgPathBuf),