Patchwork D6394: rust-dirstate: add "dirs" rust-cpython binding

login
register
mail settings
Submitter phabricator
Date May 17, 2019, 10:10 a.m.
Message ID <differential-rev-PHID-DREV-ivgamq4lyebxwfkjjeoj-req@phab.mercurial-scm.org>
Download mbox | patch
Permalink /patch/40108/
State Superseded
Headers show

Comments

phabricator - May 17, 2019, 10:10 a.m.
Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  There is an obvious performance and memory issue with those bindings on larger
  repos as it copies and allocates everything at once, round-trip. Like in the
  previous patch series, this is only temporary and will only get better once
  we don't have large data structures going to and from Python.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6394

AFFECTED FILES
  rust/hg-cpython/src/dirstate.rs

CHANGE DETAILS




To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
phabricator - June 10, 2019, 1:06 p.m.
kevincox accepted this revision.
kevincox added inline comments.

INLINE COMMENTS

> dirstate.rs:221
> +        }
> +        let dirs_map;
> +

let dirs_map = if ...

Then just let the if statement evaluate to this value.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6394

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel

Patch

diff --git a/rust/hg-cpython/src/dirstate.rs b/rust/hg-cpython/src/dirstate.rs
--- a/rust/hg-cpython/src/dirstate.rs
+++ b/rust/hg-cpython/src/dirstate.rs
@@ -11,21 +11,25 @@ 
 //! From Python, this will be seen as `mercurial.rustext.dirstate`
 
 use cpython::{
-    exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject, PyResult,
-    PySequence, PyTuple, Python, ToPyObject,
+    exc, ObjectProtocol, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject,
+    PyResult, PySequence, PyTuple, Python, ToPyObject,
 };
 use hg::{
-    pack_dirstate, parse_dirstate, CopyVecEntry, DirstateEntry,
-    DirstatePackError, DirstateParents, DirstateParseError, DirstateVec,
+    pack_dirstate, parse_dirstate, CopyVecEntry, DirsIterable, DirsMultiset,
+    DirstateEntry, DirstateMapError, DirstatePackError, DirstateParents,
+    DirstateParseError, DirstateVec,
 };
 use std::collections::HashMap;
 use std::ffi::CStr;
+
 #[cfg(feature = "python27")]
 extern crate python27_sys as python_sys;
 #[cfg(feature = "python3")]
 extern crate python3_sys as python_sys;
+
 use self::python_sys::PyCapsule_Import;
 use libc::{c_char, c_int};
+use std::cell::RefCell;
 use std::mem::transmute;
 
 /// C code uses a custom `dirstate_tuple` type, checks in multiple instances
@@ -102,20 +106,11 @@ 
     }
 }
 
-fn pack_dirstate_wrapper(
+fn extract_dirstate_vec(
     py: Python,
-    dmap: PyDict,
-    copymap: PyDict,
-    pl: PyTuple,
-    now: PyInt,
-) -> PyResult<PyBytes> {
-    let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
-    let p1: &[u8] = p1.data(py);
-    let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
-    let p2: &[u8] = p2.data(py);
-
-    let dirstate_vec: Result<DirstateVec, PyErr> = dmap
-        .items(py)
+    dmap: &PyDict,
+) -> Result<DirstateVec, PyErr> {
+    dmap.items(py)
         .iter()
         .map(|(filename, stats)| {
             let stats = stats.extract::<PySequence>(py)?;
@@ -136,7 +131,22 @@ 
                 },
             ))
         })
-        .collect();
+        .collect()
+}
+
+fn pack_dirstate_wrapper(
+    py: Python,
+    dmap: PyDict,
+    copymap: PyDict,
+    pl: PyTuple,
+    now: PyInt,
+) -> PyResult<PyBytes> {
+    let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?;
+    let p1: &[u8] = p1.data(py);
+    let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
+    let p2: &[u8] = p2.data(py);
+
+    let dirstate_vec = extract_dirstate_vec(py, &dmap)?;
 
     let copies: Result<HashMap<Vec<u8>, Vec<u8>>, PyErr> = copymap
         .items(py)
@@ -150,7 +160,7 @@ 
         .collect();
 
     match pack_dirstate(
-        &dirstate_vec?,
+        &dirstate_vec,
         &copies?,
         DirstateParents { p1, p2 },
         now.value(py) as i32,
@@ -191,10 +201,103 @@ 
     }
 }
 
+py_class!(pub class Dirs |py| {
+    data dirs_map: RefCell<DirsMultiset>;
+
+    // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes
+    // a `list`)
+    def __new__(
+        _cls,
+        map: PyObject,
+        skip: Option<PyObject> = None
+    ) -> PyResult<Self> {
+        let mut skip_state: Option<i8> = None;
+        if let Some(skip) = skip {
+            skip_state = Some(skip.extract::<PyBytes>(py)?.data(py)[0] as i8);
+        }
+        let dirs_map;
+
+        if let Ok(map) = map.cast_as::<PyDict>(py) {
+            let dirstate_vec = extract_dirstate_vec(py, &map)?;
+            dirs_map = DirsMultiset::new(
+                DirsIterable::Dirstate(dirstate_vec),
+                skip_state,
+            )
+        } else {
+            let map: Result<Vec<Vec<u8>>, PyErr> = map
+                .iter(py)?
+                .map(|o| Ok(o?.extract::<PyBytes>(py)?.data(py).to_owned()))
+                .collect();
+            dirs_map = DirsMultiset::new(
+                DirsIterable::Manifest(map?),
+                skip_state,
+            )
+        }
+
+        Self::create_instance(py, RefCell::new(dirs_map))
+    }
+
+    def addpath(&self, path: PyObject) -> PyResult<PyObject> {
+        self.dirs_map(py).borrow_mut().add_path(
+            path.extract::<PyBytes>(py)?.data(py).to_owned(),
+        );
+        Ok(py.None())
+    }
+
+    def delpath(&self, path: PyObject) -> PyResult<PyObject> {
+        self.dirs_map(py).borrow_mut().delete_path(
+            path.extract::<PyBytes>(py)?.data(py).to_owned(),
+        )
+            .and(Ok(py.None()))
+            .or_else(|e| {
+                match e {
+                    DirstateMapError::PathNotFound(_p) => {
+                        Err(PyErr::new::<exc::ValueError, _>(
+                            py,
+                            "expected a value, found none".to_string(),
+                        ))
+                    }
+                    DirstateMapError::EmptyPath => {
+                        Ok(py.None())
+                    }
+                }
+            })
+    }
+
+    // This is really inefficient on top of being ugly, but it's an easy way
+    // of having it work to continue working on the rest of the module
+    // hopefully bypassing Python entirely pretty soon.
+    def __iter__(&self) -> PyResult<PyObject> {
+        let dict = PyDict::new(py);
+
+        for (key, value) in self.dirs_map(py).borrow().iter() {
+            dict.set_item(
+                py,
+                PyBytes::new(py, &key[..]),
+                value.to_py_object(py),
+            )?;
+        }
+
+        let locals = PyDict::new(py);
+        locals.set_item(py, "obj", dict)?;
+
+        py.eval("iter(obj)", None, Some(&locals))
+    }
+
+    def __contains__(&self, item: PyObject) -> PyResult<bool> {
+        Ok(self
+            .dirs_map(py)
+            .borrow()
+            .get(&item.extract::<PyBytes>(py)?.data(py).to_owned())
+            .is_some())
+    }
+});
+
 /// Create the module, with `__package__` given from parent
 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
     let dotted_name = &format!("{}.dirstate", package);
     let m = PyModule::new(py, dotted_name)?;
+
     m.add(py, "__package__", package)?;
     m.add(py, "__doc__", "Dirstate - Rust implementation")?;
     m.add(
@@ -219,6 +322,8 @@ 
         ),
     )?;
 
+    m.add_class::<Dirs>(py)?;
+
     let sys = PyModule::import(py, "sys")?;
     let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
     sys_modules.set_item(py, dotted_name, &m)?;