rust-index: optimize find_gca_candidates() on less than 8 revisions
authorGeorges Racinet <georges.racinet@octobus.net>
Fri, 20 Oct 2023 09:12:22 +0200
changeset 51229 1b23aaf5eb7b
parent 51228 61a6ef876efd
child 51230 f9a52a9603f9
rust-index: optimize find_gca_candidates() on less than 8 revisions This is expected to be by far the most common case, given that, e.g., merging involves using it on two revisions. Using a `u8` as support for the bitset obviously divides the amount of RAM needed by 8. To state the obvious, on a repository with 10 million changesets, this spares 70MB. It is also possible that it'd be slightly faster, because it is easier to allocate and provides better cache locality. It is possible that some exhaustive listing of the traits implemented by `u8` and `u64` would avoid the added duplication, but that can be done later and would need a replacement for the `MAX` consts.
rust/hg-core/src/revlog/index.rs
rust/hg-cpython/src/revlog.rs
--- a/rust/hg-core/src/revlog/index.rs	Fri Oct 20 08:54:49 2023 +0200
+++ b/rust/hg-core/src/revlog/index.rs	Fri Oct 20 09:12:22 2023 +0200
@@ -1053,7 +1053,9 @@
 
         let revisions: Vec<Revision> = as_set.into_iter().collect();
 
-        if revisions.len() <= 63 {
+        if revisions.len() < 8 {
+            self.find_gca_candidates::<u8>(&revisions)
+        } else if revisions.len() < 64 {
             self.find_gca_candidates::<u64>(&revisions)
         } else {
             self.find_gca_candidates::<NonStaticPoisonableBitSet>(&revisions)
@@ -1314,6 +1316,7 @@
 }
 
 const U64_POISON: u64 = 1 << 63;
+const U8_POISON: u8 = 1 << 7;
 
 impl PoisonableBitSet for u64 {
     fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
@@ -1361,6 +1364,52 @@
     }
 }
 
+impl PoisonableBitSet for u8 {
+    fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> {
+        vec![0; vec_len]
+    }
+
+    fn size(&self) -> usize {
+        1
+    }
+
+    fn capacity(&self) -> usize {
+        7
+    }
+
+    fn add(&mut self, n: usize) {
+        (*self) |= 1 << n;
+    }
+
+    fn discard(&mut self, n: usize) {
+        (*self) &= u8::MAX - (1 << n);
+    }
+
+    fn union(&mut self, other: &Self) {
+        if *self != *other {
+            (*self) |= *other;
+        }
+    }
+
+    fn is_full_range(&self, n: usize) -> bool {
+        *self + 1 == (1 << n)
+    }
+
+    fn is_empty(&self) -> bool {
+        *self == 0
+    }
+
+    fn poison(&mut self) {
+        *self = U8_POISON;
+    }
+
+    fn is_poisoned(&self) -> bool {
+        // equality comparison would be tempting but would not resist
+        // operations after poisoning (even if these should be bogus).
+        *self >= U8_POISON
+    }
+}
+
 /// A poisonable bit set whose capacity is not known at compile time but
 /// is constant after initial construction
 ///
--- a/rust/hg-cpython/src/revlog.rs	Fri Oct 20 08:54:49 2023 +0200
+++ b/rust/hg-cpython/src/revlog.rs	Fri Oct 20 09:12:22 2023 +0200
@@ -20,7 +20,10 @@
 };
 use hg::{
     errors::HgError,
-    index::{IndexHeader, Phase, RevisionDataParams, SnapshotsCache},
+    index::{
+        IndexHeader, Phase, RevisionDataParams, SnapshotsCache,
+        INDEX_ENTRY_SIZE,
+    },
     nodemap::{Block, NodeMapError, NodeTree},
     revlog::{nodemap::NodeMap, NodePrefix, RevlogError, RevlogIndex},
     BaseRevision, Revision, UncheckedRevision, NULL_REVISION,
@@ -483,12 +486,26 @@
 
     @property
     def entry_size(&self) -> PyResult<PyInt> {
-        self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
+        let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
+
+        let c_res = self.cindex(py).borrow().inner()
+            .getattr(py, "entry_size")?;
+        assert_py_eq(py, "entry_size", rust_res.as_object(), &c_res)?;
+
+        Ok(rust_res)
     }
 
     @property
     def rust_ext_compat(&self) -> PyResult<PyInt> {
-        self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
+        // will be entirely removed when the Rust index yet useful to
+        // implement in Rust to detangle things when removing `self.cindex`
+        let rust_res: PyInt = 1.to_py_object(py);
+
+        let c_res = self.cindex(py).borrow().inner()
+            .getattr(py, "rust_ext_compat")?;
+        assert_py_eq(py, "rust_ext_compat", rust_res.as_object(), &c_res)?;
+
+        Ok(rust_res)
     }
 
 });
@@ -671,7 +688,7 @@
         let rust_index_len = self.index(py).borrow().len();
         let cindex_len = self.cindex(py).borrow().inner().len(py)?;
         assert_eq!(rust_index_len, cindex_len);
-        Ok(cindex_len)
+        Ok(rust_index_len)
     }
 
     /// This is scaffolding at this point, but it could also become