rust/hg-cpython/src/discovery.rs
author Raphaël Gomès <rgomes@octobus.net>
Thu, 23 Nov 2023 03:41:58 +0100
changeset 51255 24d3298189d7
parent 51245 0b81440e2a73
permissions -rw-r--r--
rust-index: document safety invariants being upheld for every `unsafe` block We've added a lot of `unsafe` code that shares Rust structs with Python. While this is unfortunate, it is also unavoidable, so let's at least systematically explain why each call to `unsafe` is sound. If any of the unsafe code ends up being wrong (because everyone screws up at some point), this change at least continues the unspoken rule of always explaining the need for `unsafe`, so we at least get a chance to think.

// discovery.rs
//
// Copyright 2018 Georges Racinet <gracinet@anybox.fr>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.

//! Bindings for the `hg::discovery` module provided by the
//! `hg-core` crate. From Python, this will be seen as `rustext.discovery`
//!
//! # Classes visible from Python:
//! - [`PartialDiscovery`] is the Rust implementation of
//!   `mercurial.setdiscovery.partialdiscovery`.

use crate::PyRevision;
use crate::{
    conversion::rev_pyiter_collect, exceptions::GraphError,
    revlog::PySharedIndex,
};
use cpython::{
    ObjectProtocol, PyClone, PyDict, PyModule, PyObject, PyResult, PyTuple,
    Python, PythonObject, ToPyObject, UnsafePyLeaked,
};
use hg::discovery::PartialDiscovery as CorePartialDiscovery;
use hg::Revision;
use std::collections::HashSet;

use std::cell::RefCell;

use crate::revlog::py_rust_index_to_graph;

py_class!(pub class PartialDiscovery |py| {
    data inner: RefCell<UnsafePyLeaked<CorePartialDiscovery<PySharedIndex>>>;
    data index: RefCell<UnsafePyLeaked<PySharedIndex>>;

    // `_respectsize` is currently only here to replicate the Python API and
    // will be used in future patches inside methods that are yet to be
    // implemented.
    def __new__(
        _cls,
        repo: PyObject,
        targetheads: PyObject,
        respectsize: bool,
        randomize: bool = true
    ) -> PyResult<PartialDiscovery> {
        Self::inner_new(py, repo, targetheads, respectsize, randomize)
    }

    def addcommons(&self, commons: PyObject) -> PyResult<PyObject> {
        self.inner_addcommons(py, commons)
    }

    def addmissings(&self, missings: PyObject) -> PyResult<PyObject> {
        self.inner_addmissings(py, missings)
    }

    def addinfo(&self, sample: PyObject) -> PyResult<PyObject> {
        self.inner_addinfo(py, sample)
    }

    def hasinfo(&self) -> PyResult<bool> {
        let leaked = self.inner(py).borrow();
        // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
        let inner = unsafe { leaked.try_borrow(py)? };
        Ok(inner.has_info())
    }

    def iscomplete(&self) -> PyResult<bool> {
        let leaked = self.inner(py).borrow();
        // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
        let inner = unsafe { leaked.try_borrow(py)? };
        Ok(inner.is_complete())
    }

    def stats(&self) -> PyResult<PyDict> {
        let leaked = self.inner(py).borrow();
        // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
        let inner = unsafe { leaked.try_borrow(py)? };
        let stats = inner.stats();
        let as_dict: PyDict = PyDict::new(py);
        as_dict.set_item(py, "undecided",
                         stats.undecided.map(
                             |l| l.to_py_object(py).into_object())
                             .unwrap_or_else(|| py.None()))?;
        Ok(as_dict)
    }

    def commonheads(&self) -> PyResult<HashSet<PyRevision>> {
        let leaked = self.inner(py).borrow();
        // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
        let inner = unsafe { leaked.try_borrow(py)? };
        let res = inner.common_heads()
                    .map_err(|e| GraphError::pynew(py, e))?;
        Ok(res.into_iter().map(Into::into).collect())
    }

    def takefullsample(&self, headrevs: PyObject,
                       size: usize) -> PyResult<PyObject> {
        self.inner_takefullsample(py, headrevs, size)
    }

    def takequicksample(&self, headrevs: PyObject,
                        size: usize) -> PyResult<PyObject> {
        self.inner_takequicksample(py, headrevs, size)
    }

});

impl PartialDiscovery {
    fn inner_new(
        py: Python,
        repo: PyObject,
        targetheads: PyObject,
        respectsize: bool,
        randomize: bool,
    ) -> PyResult<Self> {
        let index = repo.getattr(py, "changelog")?.getattr(py, "index")?;
        let cloned_index = py_rust_index_to_graph(py, index.clone_ref(py))?;
        let index = py_rust_index_to_graph(py, index)?;
        // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
        let target_heads = {
            let borrowed_idx = unsafe { index.try_borrow(py)? };
            rev_pyiter_collect(py, &targetheads, &*borrowed_idx)?
        };
        // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
        let lazy_disco = unsafe {
            index.map(py, |idx| {
                CorePartialDiscovery::new(
                    idx,
                    target_heads,
                    respectsize,
                    randomize,
                )
            })
        };
        Self::create_instance(
            py,
            RefCell::new(lazy_disco),
            RefCell::new(cloned_index),
        )
    }

    /// Convert a Python iterator of revisions into a vector
    fn pyiter_to_vec(
        &self,
        py: Python,
        iter: &PyObject,
    ) -> PyResult<Vec<Revision>> {
        let leaked = self.index(py).borrow();
        // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
        let index = unsafe { leaked.try_borrow(py)? };
        rev_pyiter_collect(py, iter, &*index)
    }

    fn inner_addinfo(
        &self,
        py: Python,
        sample: PyObject,
    ) -> PyResult<PyObject> {
        let mut missing: Vec<Revision> = Vec::new();
        let mut common: Vec<Revision> = Vec::new();
        for info in sample.iter(py)? {
            // info is a pair (Revision, bool)
            let mut revknown = info?.iter(py)?;
            let rev: PyRevision = revknown.next().unwrap()?.extract(py)?;
            // This is fine since we're just using revisions as integers
            // for the purposes of discovery
            let rev = Revision(rev.0);
            let known: bool = revknown.next().unwrap()?.extract(py)?;
            if known {
                common.push(rev);
            } else {
                missing.push(rev);
            }
        }
        let mut leaked = self.inner(py).borrow_mut();
        // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
        let mut inner = unsafe { leaked.try_borrow_mut(py)? };
        inner
            .add_common_revisions(common)
            .map_err(|e| GraphError::pynew(py, e))?;
        inner
            .add_missing_revisions(missing)
            .map_err(|e| GraphError::pynew(py, e))?;
        Ok(py.None())
    }

    fn inner_addcommons(
        &self,
        py: Python,
        commons: PyObject,
    ) -> PyResult<PyObject> {
        let commons_vec = self.pyiter_to_vec(py, &commons)?;
        let mut leaked = self.inner(py).borrow_mut();
        // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
        let mut inner = unsafe { leaked.try_borrow_mut(py)? };
        inner
            .add_common_revisions(commons_vec)
            .map_err(|e| GraphError::pynew(py, e))?;
        Ok(py.None())
    }

    fn inner_addmissings(
        &self,
        py: Python,
        missings: PyObject,
    ) -> PyResult<PyObject> {
        let missings_vec = self.pyiter_to_vec(py, &missings)?;
        let mut leaked = self.inner(py).borrow_mut();
        // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
        let mut inner = unsafe { leaked.try_borrow_mut(py)? };
        inner
            .add_missing_revisions(missings_vec)
            .map_err(|e| GraphError::pynew(py, e))?;
        Ok(py.None())
    }

    fn inner_takefullsample(
        &self,
        py: Python,
        _headrevs: PyObject,
        size: usize,
    ) -> PyResult<PyObject> {
        let mut leaked = self.inner(py).borrow_mut();
        // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
        let mut inner = unsafe { leaked.try_borrow_mut(py)? };
        let sample = inner
            .take_full_sample(size)
            .map_err(|e| GraphError::pynew(py, e))?;
        let as_vec: Vec<PyObject> = sample
            .iter()
            .map(|rev| PyRevision(rev.0).to_py_object(py).into_object())
            .collect();
        Ok(PyTuple::new(py, as_vec.as_slice()).into_object())
    }

    fn inner_takequicksample(
        &self,
        py: Python,
        headrevs: PyObject,
        size: usize,
    ) -> PyResult<PyObject> {
        let revsvec = self.pyiter_to_vec(py, &headrevs)?;
        let mut leaked = self.inner(py).borrow_mut();
        // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
        let mut inner = unsafe { leaked.try_borrow_mut(py)? };
        let sample = inner
            .take_quick_sample(revsvec, size)
            .map_err(|e| GraphError::pynew(py, e))?;
        let as_vec: Vec<PyObject> = sample
            .iter()
            .map(|rev| PyRevision(rev.0).to_py_object(py).into_object())
            .collect();
        Ok(PyTuple::new(py, as_vec.as_slice()).into_object())
    }
}

/// Create the module, with __package__ given from parent
pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
    let dotted_name = &format!("{}.discovery", package);
    let m = PyModule::new(py, dotted_name)?;
    m.add(py, "__package__", package)?;
    m.add(
        py,
        "__doc__",
        "Discovery of common node sets - Rust implementation",
    )?;
    m.add_class::<PartialDiscovery>(py)?;

    let sys = PyModule::import(py, "sys")?;
    let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
    sys_modules.set_item(py, dotted_name, &m)?;
    // Example C code (see pyexpat.c and import.c) will "give away the
    // reference", but we won't because it will be consumed once the
    // Rust PyObject is dropped.
    Ok(m)
}