revlog: add a Rust implementation of `headrevsdiff`
authorArseniy Alekseyev <aalekseyev@janestreet.com>
Mon, 12 Feb 2024 20:01:27 +0000
changeset 51397 b01e7d97e167
parent 51396 3f37d80d3ab4
child 51398 e9304c39e075
revlog: add a Rust implementation of `headrevsdiff` Python implementation of `headrevsdiff` can be very slow in the worst case compared with the `heads` computation it replaces, since the latter is done in Rust. Even the average case of this Python implementation is still noticeable in the profiles. This patch makes the computation much much faster by doing it in Rust.
rust/hg-core/src/revlog/index.rs
rust/hg-cpython/src/revlog.rs
--- a/rust/hg-core/src/revlog/index.rs	Thu Dec 21 20:30:03 2023 +0000
+++ b/rust/hg-core/src/revlog/index.rs	Mon Feb 12 20:01:27 2024 +0000
@@ -556,6 +556,57 @@
         self.head_revs_filtered(&HashSet::new(), true)
     }
 
+    /// Return the heads removed and added by advancing from `begin` to `end`.
+    /// In revset language, we compute:
+    /// - `heads(:begin)-heads(:end)`
+    /// - `heads(:end)-heads(:begin)`
+    pub fn head_revs_diff(
+        &self,
+        begin: Revision,
+        end: Revision,
+    ) -> Result<(Vec<Revision>, Vec<Revision>), GraphError> {
+        let mut heads_added = vec![];
+        let mut heads_removed = vec![];
+
+        let mut acc = HashSet::new();
+        let Revision(begin) = begin;
+        let Revision(end) = end;
+        let mut i = end;
+
+        while i > begin {
+            // acc invariant:
+            // `j` is in the set iff `j <= i` and it has children
+            // among `i+1..end` (inclusive)
+            if !acc.remove(&i) {
+                heads_added.push(Revision(i));
+            }
+            for Revision(parent) in self.parents(Revision(i))? {
+                acc.insert(parent);
+            }
+            i -= 1;
+        }
+
+        // At this point `acc` contains old revisions that gained new children.
+        // We need to check if they had any children before. If not, those
+        // revisions are the removed heads.
+        while !acc.is_empty() {
+            // acc invariant:
+            // `j` is in the set iff `j <= i` and it has children
+            // among `begin+1..end`, but not among `i+1..begin` (inclusive)
+
+            assert!(i >= -1); // yes, `-1` can also be a head if the repo is empty
+            if acc.remove(&i) {
+                heads_removed.push(Revision(i));
+            }
+            for Revision(parent) in self.parents(Revision(i))? {
+                acc.remove(&parent);
+            }
+            i -= 1;
+        }
+
+        Ok((heads_removed, heads_added))
+    }
+
     /// Return the head revisions of this index
     pub fn head_revs_filtered(
         &self,
--- a/rust/hg-cpython/src/revlog.rs	Thu Dec 21 20:30:03 2023 +0000
+++ b/rust/hg-cpython/src/revlog.rs	Mon Feb 12 20:01:27 2024 +0000
@@ -315,6 +315,15 @@
         Ok(rust_res)
     }
 
+    /// get diff in head revisions
+    def headrevsdiff(&self, *args, **_kw) -> PyResult<PyObject> {
+        let rust_res = self.inner_headrevsdiff(
+          py,
+          &args.get_item(py, 0),
+          &args.get_item(py, 1))?;
+        Ok(rust_res)
+    }
+
     /// get filtered head revisions
     def headrevsfiltered(&self, *args, **_kw) -> PyResult<PyObject> {
         let rust_res = self.inner_headrevsfiltered(py, &args.get_item(py, 0))?;
@@ -827,6 +836,38 @@
             .into_object())
     }
 
+    fn check_revision(
+        index: &hg::index::Index,
+        rev: UncheckedRevision,
+        py: Python,
+    ) -> PyResult<Revision> {
+        index
+            .check_revision(rev)
+            .ok_or_else(|| rev_not_in_index(py, rev))
+    }
+
+    fn inner_headrevsdiff(
+        &self,
+        py: Python,
+        begin: &PyObject,
+        end: &PyObject,
+    ) -> PyResult<PyObject> {
+        let begin = begin.extract::<BaseRevision>(py)?;
+        let end = end.extract::<BaseRevision>(py)?;
+        let index = &mut *self.index(py).borrow_mut();
+        let begin =
+            Self::check_revision(index, UncheckedRevision(begin - 1), py)?;
+        let end = Self::check_revision(index, UncheckedRevision(end - 1), py)?;
+        let (removed, added) = index
+            .head_revs_diff(begin, end)
+            .map_err(|e| graph_error(py, e))?;
+        let removed: Vec<_> =
+            removed.into_iter().map(PyRevision::from).collect();
+        let added: Vec<_> = added.into_iter().map(PyRevision::from).collect();
+        let res = (removed, added).to_py_object(py).into_object();
+        Ok(res)
+    }
+
     fn inner_headrevsfiltered(
         &self,
         py: Python,