rust-index: implement faster retain heads using a vec instead of a hashset
authorRaphaël Gomès <rgomes@octobus.net>
Wed, 29 Nov 2023 10:04:41 -0500
changeset 51259 ed6683d4cb29
parent 51258 8b89f7cc953a
child 51260 c4f1a790bda8
rust-index: implement faster retain heads using a vec instead of a hashset This is the same optimization that the C index does, we're only catching up now because this showed up as slow in benchmarking.
rust/hg-core/src/dagops.rs
rust/hg-core/src/revlog/index.rs
--- a/rust/hg-core/src/dagops.rs	Thu Dec 14 11:52:05 2023 +0100
+++ b/rust/hg-core/src/dagops.rs	Wed Nov 29 10:04:41 2023 -0500
@@ -13,7 +13,7 @@
 //! - Similarly *relative roots* of a collection of `Revision`, we mean those
 //!   whose parents, if any, don't belong to the collection.
 use super::{Graph, GraphError, Revision, NULL_REVISION};
-use crate::ancestors::AncestorsIterator;
+use crate::{ancestors::AncestorsIterator, BaseRevision};
 use std::collections::{BTreeSet, HashSet};
 
 fn remove_parents<S: std::hash::BuildHasher>(
@@ -81,6 +81,32 @@
     Ok(())
 }
 
+/// Optimized version of `retain_heads` that expects an zeroed array of the size
+/// of the graph, to act as a faster but less space-efficient `HashSet`.
+///
+/// # Panics
+///
+/// Can panic if `not_heads` is shorten than the length of graph.
+pub fn retain_heads_fast(
+    graph: &impl Graph,
+    not_heads: &mut [bool],
+    filtered_revs: &HashSet<Revision>,
+) -> Result<(), GraphError> {
+    for idx in (0..not_heads.len()).rev() {
+        let rev = Revision(idx as BaseRevision);
+        if !not_heads[idx] && filtered_revs.contains(&rev) {
+            not_heads[idx] = true;
+            continue;
+        }
+        for parent in graph.parents(rev)?.iter() {
+            if *parent != NULL_REVISION {
+                not_heads[parent.0 as usize] = true;
+            }
+        }
+    }
+    Ok(())
+}
+
 /// Roots of `revs`, passed as a `HashSet`
 ///
 /// They are returned in arbitrary order
--- a/rust/hg-core/src/revlog/index.rs	Thu Dec 14 11:52:05 2023 +0100
+++ b/rust/hg-core/src/revlog/index.rs	Wed Nov 29 10:04:41 2023 -0500
@@ -1,4 +1,3 @@
-use std::collections::hash_map::RandomState;
 use std::collections::{HashMap, HashSet};
 use std::fmt::Debug;
 use std::ops::Deref;
@@ -565,32 +564,24 @@
                 return Ok(self_head_revs.to_owned());
             }
         }
-        let mut revs: HashSet<Revision, RandomState> =
-            if filtered_revs.is_empty() {
-                (0..self.len())
-                    .into_iter()
-                    .map(|i| Revision(i as BaseRevision))
-                    .collect()
-            } else {
-                (0..self.len())
-                    .into_iter()
-                    .filter_map(|i| {
-                        let r = Revision(i as BaseRevision);
-                        if filtered_revs.contains(&r) {
-                            None
-                        } else {
-                            Some(r)
-                        }
-                    })
-                    .collect()
-            };
-        dagops::retain_heads(self, &mut revs)?;
-        if self.is_empty() {
-            revs.insert(NULL_REVISION);
-        }
-        let mut as_vec: Vec<Revision> =
-            revs.into_iter().map(Into::into).collect();
-        as_vec.sort_unstable();
+
+        let as_vec = if self.is_empty() {
+            vec![NULL_REVISION]
+        } else {
+            let mut not_heads = vec![false; self.len()];
+            dagops::retain_heads_fast(self, &mut not_heads, filtered_revs)?;
+            not_heads
+                .into_iter()
+                .enumerate()
+                .filter_map(|(idx, is_not_head)| {
+                    if is_not_head {
+                        None
+                    } else {
+                        Some(Revision(idx as BaseRevision))
+                    }
+                })
+                .collect()
+        };
         *self
             .head_revs
             .write()