rust-dirstate: add "dirs" rust-cpython binding
authorRaphaël Gomès <rgomes@octobus.net>
Thu, 16 May 2019 18:03:42 +0200
changeset 42537 ce94f9622acd
parent 42536 2dcee6497b0b
child 42538 2db96bf84a8f
rust-dirstate: add "dirs" rust-cpython binding There is an obvious performance and memory issue with those bindings on larger repos as it copies and allocates everything at once, round-trip. Like in the previous patch series, this is only temporary and will only get better once we don't have large data structures going to and from Python. Differential Revision: https://phab.mercurial-scm.org/D6394
rust/hg-cpython/src/dirstate.rs
--- a/rust/hg-cpython/src/dirstate.rs	Thu May 16 18:03:06 2019 +0200
+++ b/rust/hg-cpython/src/dirstate.rs	Thu May 16 18:03:42 2019 +0200
@@ -11,21 +11,25 @@
 //! From Python, this will be seen as `mercurial.rustext.dirstate`
 
 use cpython::{
-    exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject, PyResult,
-    PySequence, PythonObject, PyTuple, Python, ToPyObject,
+    exc, ObjectProtocol, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject,
+    PyResult, PySequence, PyTuple, Python, PythonObject, ToPyObject,
 };
 use hg::{
-    pack_dirstate, parse_dirstate, CopyVecEntry, DirstateEntry,
-    DirstatePackError, DirstateParents, DirstateParseError, DirstateVec,
+    pack_dirstate, parse_dirstate, CopyVecEntry, DirsIterable, DirsMultiset,
+    DirstateEntry, DirstateMapError, DirstatePackError, DirstateParents,
+    DirstateParseError, DirstateVec,
 };
 use std::collections::HashMap;
 use std::ffi::CStr;
+
 #[cfg(feature = "python27")]
 extern crate python27_sys as python_sys;
 #[cfg(feature = "python3")]
 extern crate python3_sys as python_sys;
+
 use self::python_sys::PyCapsule_Import;
 use libc::{c_char, c_int};
+use std::cell::RefCell;
 use std::mem::transmute;
 
 /// C code uses a custom `dirstate_tuple` type, checks in multiple instances
@@ -102,20 +106,11 @@
     }
 }
 
-fn pack_dirstate_wrapper(
+fn extract_dirstate_vec(
     py: Python,
-    dmap: PyDict,
-    copymap: PyDict,
-    pl: PyTuple,
-    now: PyInt,
-) -> PyResult<PyBytes> {
-    let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
-    let p1: &[u8] = p1.data(py);
-    let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
-    let p2: &[u8] = p2.data(py);
-
-    let dirstate_vec: Result<DirstateVec, PyErr> = dmap
-        .items(py)
+    dmap: &PyDict,
+) -> Result<DirstateVec, PyErr> {
+    dmap.items(py)
         .iter()
         .map(|(filename, stats)| {
             let stats = stats.extract::<PySequence>(py)?;
@@ -136,7 +131,22 @@
                 },
             ))
         })
-        .collect();
+        .collect()
+}
+
+fn pack_dirstate_wrapper(
+    py: Python,
+    dmap: PyDict,
+    copymap: PyDict,
+    pl: PyTuple,
+    now: PyInt,
+) -> PyResult<PyBytes> {
+    let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
+    let p1: &[u8] = p1.data(py);
+    let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
+    let p2: &[u8] = p2.data(py);
+
+    let dirstate_vec = extract_dirstate_vec(py, &dmap)?;
 
     let copies: Result<HashMap<Vec<u8>, Vec<u8>>, PyErr> = copymap
         .items(py)
@@ -150,7 +160,7 @@
         .collect();
 
     match pack_dirstate(
-        &dirstate_vec?,
+        &dirstate_vec,
         &copies?,
         DirstateParents { p1, p2 },
         now.as_object().extract::<i32>(py)?,
@@ -170,7 +180,10 @@
                     py,
                     PyBytes::new(py, &filename[..]),
                     decapsule_make_dirstate_tuple(py)?(
-                        state as c_char, mode, size, mtime,
+                        state as c_char,
+                        mode,
+                        size,
+                        mtime,
                     ),
                 )?;
             }
@@ -191,10 +204,103 @@
     }
 }
 
+py_class!(pub class Dirs |py| {
+    data dirs_map: RefCell<DirsMultiset>;
+
+    // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes
+    // a `list`)
+    def __new__(
+        _cls,
+        map: PyObject,
+        skip: Option<PyObject> = None
+    ) -> PyResult<Self> {
+        let mut skip_state: Option<i8> = None;
+        if let Some(skip) = skip {
+            skip_state = Some(skip.extract::<PyBytes>(py)?.data(py)[0] as i8);
+        }
+        let dirs_map;
+
+        if let Ok(map) = map.cast_as::<PyDict>(py) {
+            let dirstate_vec = extract_dirstate_vec(py, &map)?;
+            dirs_map = DirsMultiset::new(
+                DirsIterable::Dirstate(dirstate_vec),
+                skip_state,
+            )
+        } else {
+            let map: Result<Vec<Vec<u8>>, PyErr> = map
+                .iter(py)?
+                .map(|o| Ok(o?.extract::<PyBytes>(py)?.data(py).to_owned()))
+                .collect();
+            dirs_map = DirsMultiset::new(
+                DirsIterable::Manifest(map?),
+                skip_state,
+            )
+        }
+
+        Self::create_instance(py, RefCell::new(dirs_map))
+    }
+
+    def addpath(&self, path: PyObject) -> PyResult<PyObject> {
+        self.dirs_map(py).borrow_mut().add_path(
+            path.extract::<PyBytes>(py)?.data(py),
+        );
+        Ok(py.None())
+    }
+
+    def delpath(&self, path: PyObject) -> PyResult<PyObject> {
+        self.dirs_map(py).borrow_mut().delete_path(
+            path.extract::<PyBytes>(py)?.data(py),
+        )
+            .and(Ok(py.None()))
+            .or_else(|e| {
+                match e {
+                    DirstateMapError::PathNotFound(_p) => {
+                        Err(PyErr::new::<exc::ValueError, _>(
+                            py,
+                            "expected a value, found none".to_string(),
+                        ))
+                    }
+                    DirstateMapError::EmptyPath => {
+                        Ok(py.None())
+                    }
+                }
+            })
+    }
+
+    // This is really inefficient on top of being ugly, but it's an easy way
+    // of having it work to continue working on the rest of the module
+    // hopefully bypassing Python entirely pretty soon.
+    def __iter__(&self) -> PyResult<PyObject> {
+        let dict = PyDict::new(py);
+
+        for (key, value) in self.dirs_map(py).borrow().iter() {
+            dict.set_item(
+                py,
+                PyBytes::new(py, &key[..]),
+                value.to_py_object(py),
+            )?;
+        }
+
+        let locals = PyDict::new(py);
+        locals.set_item(py, "obj", dict)?;
+
+        py.eval("iter(obj)", None, Some(&locals))
+    }
+
+    def __contains__(&self, item: PyObject) -> PyResult<bool> {
+        Ok(self
+            .dirs_map(py)
+            .borrow()
+            .get(&item.extract::<PyBytes>(py)?.data(py).to_owned())
+            .is_some())
+    }
+});
+
 /// Create the module, with `__package__` given from parent
 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
     let dotted_name = &format!("{}.dirstate", package);
     let m = PyModule::new(py, dotted_name)?;
+
     m.add(py, "__package__", package)?;
     m.add(py, "__doc__", "Dirstate - Rust implementation")?;
     m.add(
@@ -219,6 +325,8 @@
         ),
     )?;
 
+    m.add_class::<Dirs>(py)?;
+
     let sys = PyModule::import(py, "sys")?;
     let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
     sys_modules.set_item(py, dotted_name, &m)?;