Patchwork D5438: rust-cpython: implementing Graph using C parents function

login
register
mail settings
Submitter phabricator
Date Dec. 22, 2018, 4:24 p.m.
Message ID <272a50890fa275bc4936cf5e9bb047f7@localhost.localdomain>
Download mbox | patch
Permalink /patch/37316/
State Not Applicable
Headers show

Comments

phabricator - Dec. 22, 2018, 4:24 p.m.
gracinet updated this revision to Diff 12949.
gracinet edited the summary of this revision.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D5438?vs=12875&id=12949

REVISION DETAIL
  https://phab.mercurial-scm.org/D5438

AFFECTED FILES
  mercurial/cext/revlog.c
  rust/hg-cpython/src/cindex.rs
  rust/hg-cpython/src/lib.rs

CHANGE DETAILS




To: gracinet, #hg-reviewers
Cc: yuja, durin42, kevincox, mercurial-devel
Yuya Nishihara - Dec. 24, 2018, 10:29 a.m.
> +/// # TODO find a solution to make it GIL safe again.
> +///
> +/// This is non trivial, and can wait until we have a clearer picture with
> +/// more Rust Mercurial constructs.
> +///
> +/// One possibility would be to a `GILProtectedIndex` wrapper enclosing
> +/// a `Python<'p>` marker and have it be the one implementing the
> +/// `Graph` trait, but this would mean the `Graph` implementor would become
> +/// likely to change between subsequent method invocations of the `hg-core`
> +/// objects (a serious change of the `hg-core` API):
> +/// either exposing ways to mutate the `Graph`, or making it a non persistent
> +/// parameter in the relevant methods that need one.

Thinking this a bit further, I'm getting to feel that a "non persistent
parameter" will be an easier choice.

If an `Index` object were implemented in pure Rust, it would hold the entire
index data in memory. As we wouldn't want to memcpy such large object, there
would be some reference types (e.g. `&Index`, `Rc<Index>`, etc.) involved
somewhere. For instance, `AncestorsIterator<G>` might have to be
`AncestorsIterator<G: 'g>`, and holding a reference would slightly complicate
things in a similar way to holding `Python<'p>`.

`Index` could be backed by e.g. `Rc<RefCell<_>>` to allow any objects to own
`<G: Index>` copies, but I don't feel like this is a good design.
phabricator - Dec. 24, 2018, 10:31 a.m.
yuja added a comment.


  > +/// # TODO find a solution to make it GIL safe again.
  >  +///
  >  +/// This is non trivial, and can wait until we have a clearer picture with
  >  +/// more Rust Mercurial constructs.
  >  +///
  >  +/// One possibility would be to a `GILProtectedIndex` wrapper enclosing
  >  +/// a `Python<'p>` marker and have it be the one implementing the
  >  +/// `Graph` trait, but this would mean the `Graph` implementor would become
  >  +/// likely to change between subsequent method invocations of the `hg-core`
  >  +/// objects (a serious change of the `hg-core` API):
  >  +/// either exposing ways to mutate the `Graph`, or making it a non persistent
  >  +/// parameter in the relevant methods that need one.
  
  Thinking this a bit further, I'm getting to feel that a "non persistent
  parameter" will be an easier choice.
  
  If an `Index` object were implemented in pure Rust, it would hold the entire
  index data in memory. As we wouldn't want to memcpy such large object, there
  would be some reference types (e.g. `&Index`, `Rc<Index>`, etc.) involved
  somewhere. For instance, `AncestorsIterator<G>` might have to be
  `AncestorsIterator<G: 'g>`, and holding a reference would slightly complicate
  things in a similar way to holding `Python<'p>`.
  
  `Index` could be backed by e.g. `Rc<RefCell<_>>` to allow any objects to own
  `<G: Index>` copies, but I don't feel like this is a good design.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D5438

To: gracinet, #hg-reviewers
Cc: yuja, durin42, kevincox, mercurial-devel

Patch

diff --git a/rust/hg-cpython/src/lib.rs b/rust/hg-cpython/src/lib.rs
--- a/rust/hg-cpython/src/lib.rs
+++ b/rust/hg-cpython/src/lib.rs
@@ -21,8 +21,10 @@ 
 #[macro_use]
 extern crate cpython;
 extern crate hg;
+extern crate libc;
 
 mod ancestors;
+mod cindex;
 mod exceptions;
 
 py_module_initializer!(rustext, initrustext, PyInit_rustext, |py, m| {
diff --git a/rust/hg-cpython/src/cindex.rs b/rust/hg-cpython/src/cindex.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-cpython/src/cindex.rs
@@ -0,0 +1,121 @@ 
+// cindex.rs
+//
+// Copyright 2018 Georges Racinet <gracinet@anybox.fr>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Bindings to use the Index defined by the parsers C extension
+//!
+//! Ideally, we should use an Index entirely implemented in Rust,
+//! but this will take some time to get there.
+#[cfg(feature = "python27")]
+extern crate python27_sys as python_sys;
+#[cfg(feature = "python3")]
+extern crate python3_sys as python_sys;
+
+use self::python_sys::PyCapsule_Import;
+use cpython::{PyErr, PyObject, PyResult, Python};
+use hg::{Graph, GraphError, Revision};
+use libc::c_int;
+use std::ffi::CStr;
+use std::mem::transmute;
+
+type IndexParentsFn = unsafe extern "C" fn(
+    index: *mut python_sys::PyObject,
+    rev: c_int,
+    ps: *mut [c_int; 2],
+) -> c_int;
+
+/// A `Graph` backed up by objects and functions from revlog.c
+///
+/// This implementation of the `Graph` trait, relies on (pointers to)
+/// - the C index object (`index` member)
+/// - the `index_get_parents()` function (`parents` member)
+///
+/// # Safety
+///
+/// The C index itself is mutable, and this Rust exposition is **not
+/// protected by the GIL**, meaning that this construct isn't safe with respect
+/// to Python threads.
+///
+/// All callers of this `Index` must acquire the GIL and must not release it
+/// while working.
+///
+/// # TODO find a solution to make it GIL safe again.
+///
+/// This is non trivial, and can wait until we have a clearer picture with
+/// more Rust Mercurial constructs.
+///
+/// One possibility would be to a `GILProtectedIndex` wrapper enclosing
+/// a `Python<'p>` marker and have it be the one implementing the
+/// `Graph` trait, but this would mean the `Graph` implementor would become
+/// likely to change between subsequent method invocations of the `hg-core`
+/// objects (a serious change of the `hg-core` API):
+/// either exposing ways to mutate the `Graph`, or making it a non persistent
+/// parameter in the relevant methods that need one.
+///
+/// Another possibility would be to introduce an abstract lock handle into
+/// the core API, that would be tied to `GILGuard` / `Python<'p>`
+/// in the case of the `cpython` crate bindings yet could leave room for other
+/// mechanisms in other contexts.
+
+pub struct Index {
+    index: PyObject,
+    parents: IndexParentsFn,
+}
+
+impl Index {
+    pub fn new(py: Python, index: PyObject) -> PyResult<Self> {
+        Ok(Index {
+            index: index,
+            parents: decapsule_parents_fn(py)?,
+        })
+    }
+}
+
+impl Graph for Index {
+    /// wrap a call to the C extern parents function
+    fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
+        let mut res: [c_int; 2] = [0; 2];
+        let code = unsafe {
+            (self.parents)(
+                self.index.as_ptr(),
+                rev as c_int,
+                &mut res as *mut [c_int; 2],
+            )
+        };
+        match code {
+            0 => Ok(res),
+            _ => Err(GraphError::ParentOutOfRange(rev)),
+        }
+    }
+}
+
+/// Return the `index_get_parents` function of the parsers C Extension module.
+///
+/// A pointer to the function is stored in the `parsers` module as a
+/// standard [Python capsule](https://docs.python.org/2/c-api/capsule.html).
+///
+/// This function retrieves the capsule and casts the function pointer
+///
+/// Casting function pointers is one of the rare cases of
+/// legitimate use cases of `mem::transmute()` (see
+/// https://doc.rust-lang.org/std/mem/fn.transmute.html of
+/// `mem::transmute()`.
+/// It is inappropriate for architectures where
+/// function and data pointer sizes differ (so-called "Harvard
+/// architectures"), but these are nowadays mostly DSPs
+/// and microcontrollers, hence out of our scope.
+fn decapsule_parents_fn(py: Python) -> PyResult<IndexParentsFn> {
+    unsafe {
+        let caps_name = CStr::from_bytes_with_nul_unchecked(
+            b"mercurial.cext.parsers.index_get_parents_CAPI\0",
+        );
+        let from_caps = PyCapsule_Import(caps_name.as_ptr(), 0);
+        if from_caps.is_null() {
+            return Err(PyErr::fetch(py));
+        }
+        Ok(transmute(from_caps))
+    }
+}
diff --git a/mercurial/cext/revlog.c b/mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c
+++ b/mercurial/cext/revlog.c
@@ -2866,6 +2866,7 @@ 
 
 void revlog_module_init(PyObject *mod)
 {
+	PyObject *caps = NULL;
 	HgRevlogIndex_Type.tp_new = PyType_GenericNew;
 	if (PyType_Ready(&HgRevlogIndex_Type) < 0)
 		return;
@@ -2885,6 +2886,12 @@ 
 	if (nullentry)
 		PyObject_GC_UnTrack(nullentry);
 
+	caps = PyCapsule_New(
+	    HgRevlogIndex_GetParents,
+	    "mercurial.cext.parsers.index_get_parents_CAPI", NULL);
+	if (caps != NULL)
+		PyModule_AddObject(mod, "index_get_parents_CAPI", caps);
+
 #ifdef WITH_RUST
 	rustlazyancestorsType.tp_new = PyType_GenericNew;
 	if (PyType_Ready(&rustlazyancestorsType) < 0)