rust-index: implement common_ancestors_heads() and ancestors()
authorGeorges Racinet <georges.racinet@octobus.net>
Wed, 18 Oct 2023 15:35:38 +0200
changeset 51225 89ce6a49bfeb
parent 51224 43241f31cf5b
child 51226 83091c14058c
rust-index: implement common_ancestors_heads() and ancestors() The only differences betwwen `common_ancestors_heads()` and `find_gca_candidates()` seems to be that: - the former accepts "overlapping" input revisions (meaning with duplicates). - limitation to 24 inputs (in the C code), that we translate to using the arbitrary size bit sets in the Rust code because we cannot bail to Python. Given that the input is expected to be small in most cases, we take the heavy handed approach of going through a HashSet and wait for perfomance assessment In case this is used via `hg-cpython`, we can anyway absorb the overhead by having `commonancestorheads` build a vector of unique values directly, and introduce a thin wrapper over `find_gca_candidates`, to take care of bit set type dispatching only. As far as `ancestors` is concerneed, this is just chaining `common_ancestors_heads()` with `find_deepest_revs`.
rust/hg-core/src/revlog/index.rs
rust/hg-cpython/src/revlog.rs
--- a/rust/hg-core/src/revlog/index.rs	Tue Oct 17 22:42:40 2023 +0200
+++ b/rust/hg-core/src/revlog/index.rs	Wed Oct 18 15:35:38 2023 +0200
@@ -1036,14 +1036,40 @@
 
     /// Given a (possibly overlapping) set of revs, return all the
     /// common ancestors heads: `heads(::args[0] and ::a[1] and ...)`
-    pub fn common_ancestor_heads(&self, _revisions: &[Revision]) {
-        todo!()
+    pub fn common_ancestor_heads(
+        &self,
+        revisions: &[Revision],
+    ) -> Result<Vec<Revision>, GraphError> {
+        // given that revisions is expected to be small, we find this shortcut
+        // potentially acceptable, especially given that `hg-cpython` could
+        // very much bypass this, constructing a vector of unique values from
+        // the onset.
+        let as_set: HashSet<Revision> = revisions.iter().copied().collect();
+        // Besides deduplicating, the C version also implements the shortcut
+        // for `NULL_REVISION`:
+        if as_set.contains(&NULL_REVISION) {
+            return Ok(vec![]);
+        }
+
+        let revisions: Vec<Revision> = as_set.into_iter().collect();
+
+        if revisions.len() <= 63 {
+            self.find_gca_candidates::<u64>(&revisions)
+        } else {
+            self.find_gca_candidates::<NonStaticPoisonableBitSet>(&revisions)
+        }
+    }
+
+    pub fn ancestors(
+        &self,
+        revisions: &[Revision],
+    ) -> Result<Vec<Revision>, GraphError> {
+        self.find_deepest_revs(&self.common_ancestor_heads(revisions)?)
     }
 
     /// Given a disjoint set of revs, return all candidates for the
     /// greatest common ancestor. In revset notation, this is the set
     /// `heads(::a and ::b and ...)`
-    #[allow(dead_code)]
     fn find_gca_candidates<BS: PoisonableBitSet + Clone>(
         &self,
         revs: &[Revision],
@@ -1147,7 +1173,6 @@
 
     /// Given a disjoint set of revs, return the subset with the longest path
     /// to the root.
-    #[allow(dead_code)]
     fn find_deepest_revs(
         &self,
         revs: &[Revision],
--- a/rust/hg-cpython/src/revlog.rs	Tue Oct 17 22:42:40 2023 +0200
+++ b/rust/hg-cpython/src/revlog.rs	Wed Oct 18 15:35:38 2023 +0200
@@ -196,12 +196,24 @@
 
     /// return the gca set of the given revs
     def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
-        self.call_cindex(py, "ancestors", args, kw)
+        let rust_res = self.inner_ancestors(py, args)?;
+
+        let c_res = self.call_cindex(py, "ancestors", args, kw)?;
+        // the algorithm should always provide the results in reverse ordering
+        assert_py_eq(py, "ancestors", &rust_res, &c_res)?;
+
+        Ok(rust_res)
     }
 
     /// return the heads of the common ancestors of the given revs
     def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
-        self.call_cindex(py, "commonancestorsheads", args, kw)
+        let rust_res = self.inner_commonancestorsheads(py, args)?;
+
+        let c_res = self.call_cindex(py, "commonancestorsheads", args, kw)?;
+        // the algorithm should always provide the results in reverse ordering
+        assert_py_eq(py, "commonancestorsheads", &rust_res, &c_res)?;
+
+        Ok(rust_res)
     }
 
     /// Clear the index caches and inner py_class data.
@@ -850,6 +862,38 @@
         Ok(PyList::new(py, &as_vec).into_object())
     }
 
+    fn inner_ancestors(
+        &self,
+        py: Python,
+        py_revs: &PyTuple,
+    ) -> PyResult<PyObject> {
+        let index = &mut *self.index(py).borrow_mut();
+        let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
+        let as_vec: Vec<_> = index
+            .ancestors(&revs)
+            .map_err(|e| graph_error(py, e))?
+            .iter()
+            .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
+            .collect();
+        Ok(PyList::new(py, &as_vec).into_object())
+    }
+
+    fn inner_commonancestorsheads(
+        &self,
+        py: Python,
+        py_revs: &PyTuple,
+    ) -> PyResult<PyObject> {
+        let index = &mut *self.index(py).borrow_mut();
+        let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
+        let as_vec: Vec<_> = index
+            .common_ancestor_heads(&revs)
+            .map_err(|e| graph_error(py, e))?
+            .iter()
+            .map(|r| PyRevision::from(*r).into_py_object(py).into_object())
+            .collect();
+        Ok(PyList::new(py, &as_vec).into_object())
+    }
+
     fn inner_computephasesmapsets(
         &self,
         py: Python,