Patchwork [4,of,4] rust-cpython: mark unsafe functions as such

login
register
mail settings
Submitter Yuya Nishihara
Date Sept. 1, 2019, 1:40 p.m.
Message ID <7de30788e0ec64cee7bb.1567345234@mimosa>
Download mbox | patch
Permalink /patch/41462/
State Accepted
Headers show

Comments

Yuya Nishihara - Sept. 1, 2019, 1:40 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1567328791 -32400
#      Sun Sep 01 18:06:31 2019 +0900
# Node ID 7de30788e0ec64cee7bba5b3fd620a188bf81a09
# Parent  a69f68569113eaa65e3a0e2773d27e2f2dea00bd
rust-cpython: mark unsafe functions as such

It wasn't trivial to fix leak_immutable() to be safe since we have to
allow immutable operations (e.g. iter()) on the leaked reference. So
let's mark it unsafe for now. Callers must take care of the returned
object to guarantee the memory safety.

I'll revisit this later. I think $leaked<T: 'static> could have a function
that converts itself into $leaked<U: 'static> with a given FnOnce(&T) -> &U,
where T is $inner_struct, and U is $iterator_type for example.
Raphaël Gomès - Sept. 1, 2019, 6:46 p.m.
Thanks a lot for this series, it looks very good to me.

There was indeed a lot of room for improvement.

On 9/1/19 3:40 PM, Yuya Nishihara wrote:
> # HG changeset patch
> # User Yuya Nishihara <yuya@tcha.org>
> # Date 1567328791 -32400
> #      Sun Sep 01 18:06:31 2019 +0900
> # Node ID 7de30788e0ec64cee7bba5b3fd620a188bf81a09
> # Parent  a69f68569113eaa65e3a0e2773d27e2f2dea00bd
> rust-cpython: mark unsafe functions as such
>
> It wasn't trivial to fix leak_immutable() to be safe since we have to
> allow immutable operations (e.g. iter()) on the leaked reference. So
> let's mark it unsafe for now. Callers must take care of the returned
> object to guarantee the memory safety.
>
> I'll revisit this later. I think $leaked<T: 'static> could have a function
> that converts itself into $leaked<U: 'static> with a given FnOnce(&T) -> &U,
> where T is $inner_struct, and U is $iterator_type for example.
I like your idea for your next series on this issue, I think it's 
possible to reduce the unsafety to a minimum.
>
> diff --git a/rust/hg-cpython/src/dirstate/dirs_multiset.rs b/rust/hg-cpython/src/dirstate/dirs_multiset.rs
> --- a/rust/hg-cpython/src/dirstate/dirs_multiset.rs
> +++ b/rust/hg-cpython/src/dirstate/dirs_multiset.rs
> @@ -86,7 +86,7 @@ py_class!(pub class Dirs |py| {
>               })
>       }
>       def __iter__(&self) -> PyResult<DirsMultisetKeysIterator> {
> -        let (leak_handle, leaked_ref) = self.leak_immutable(py)?;
> +        let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? };
>           DirsMultisetKeysIterator::create_instance(
>               py,
>               RefCell::new(Some(leak_handle)),
> diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs
> --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs
> +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs
> @@ -319,7 +319,7 @@ py_class!(pub class DirstateMap |py| {
>       }
>   
>       def keys(&self) -> PyResult<DirstateMapKeysIterator> {
> -        let (leak_handle, leaked_ref) = self.leak_immutable(py)?;
> +        let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? };
>           DirstateMapKeysIterator::from_inner(
>               py,
>               Some(leak_handle),
> @@ -328,7 +328,7 @@ py_class!(pub class DirstateMap |py| {
>       }
>   
>       def items(&self) -> PyResult<DirstateMapItemsIterator> {
> -        let (leak_handle, leaked_ref) = self.leak_immutable(py)?;
> +        let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? };
>           DirstateMapItemsIterator::from_inner(
>               py,
>               Some(leak_handle),
> @@ -337,7 +337,7 @@ py_class!(pub class DirstateMap |py| {
>       }
>   
>       def __iter__(&self) -> PyResult<DirstateMapKeysIterator> {
> -        let (leak_handle, leaked_ref) = self.leak_immutable(py)?;
> +        let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? };
>           DirstateMapKeysIterator::from_inner(
>               py,
>               Some(leak_handle),
> @@ -434,7 +434,7 @@ py_class!(pub class DirstateMap |py| {
>       }
>   
>       def copymapiter(&self) -> PyResult<CopyMapKeysIterator> {
> -        let (leak_handle, leaked_ref) = self.leak_immutable(py)?;
> +        let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? };
>           CopyMapKeysIterator::from_inner(
>               py,
>               Some(leak_handle),
> @@ -443,7 +443,7 @@ py_class!(pub class DirstateMap |py| {
>       }
>   
>       def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> {
> -        let (leak_handle, leaked_ref) = self.leak_immutable(py)?;
> +        let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? };
>           CopyMapItemsIterator::from_inner(
>               py,
>               Some(leak_handle),
> diff --git a/rust/hg-cpython/src/ref_sharing.rs b/rust/hg-cpython/src/ref_sharing.rs
> --- a/rust/hg-cpython/src/ref_sharing.rs
> +++ b/rust/hg-cpython/src/ref_sharing.rs
> @@ -58,7 +58,12 @@ impl PySharedState {
>       /// Return a reference to the wrapped data with an artificial static
>       /// lifetime.
>       /// We need to be protected by the GIL for thread-safety.
> -    pub fn leak_immutable<T>(
> +    ///
> +    /// # Safety
> +    ///
> +    /// This is highly unsafe since the lifetime of the given data can be
> +    /// extended. Do not call this function directly.
> +    pub unsafe fn leak_immutable<T>(
>           &self,
>           py: Python,
>           data: &PySharedRefCell<T>,
> @@ -72,10 +77,14 @@ impl PySharedState {
>           }
>           let ptr = data.as_ptr();
>           self.leak_count.replace(self.leak_count.get() + 1);
> -        unsafe { Ok(&*ptr) }
> +        Ok(&*ptr)
>       }
>   
> -    pub fn decrease_leak_count(&self, _py: Python, mutable: bool) {
> +    /// # Safety
> +    ///
> +    /// It's unsafe to update the reference count without knowing the
> +    /// reference is deleted. Do not call this function directly.
> +    pub unsafe fn decrease_leak_count(&self, _py: Python, mutable: bool) {
>           self.leak_count
>               .replace(self.leak_count.get().saturating_sub(1));
>           if mutable {
> @@ -123,6 +132,8 @@ pub struct PyRefMut<'a, T> {
>   }
>   
>   impl<'a, T> PyRefMut<'a, T> {
> +    // Must be constructed by PySharedState after checking its leak_count.
> +    // Otherwise, drop() would incorrectly update the state.
>       fn new(
>           _py: Python<'a>,
>           inner: RefMut<'a, T>,
> @@ -152,7 +163,9 @@ impl<'a, T> Drop for PyRefMut<'a, T> {
>       fn drop(&mut self) {
>           let gil = Python::acquire_gil();
>           let py = gil.python();
> -        self.py_shared_state.decrease_leak_count(py, true);
> +        unsafe {
> +            self.py_shared_state.decrease_leak_count(py, true);
> +        }
>       }
>   }
>   
> @@ -223,7 +236,7 @@ macro_rules! py_shared_ref {
>               /// It's up to you to make sure that the management object lives
>               /// longer than the leaked reference. Otherwise, you'll get a
>               /// dangling reference.
> -            fn leak_immutable<'a>(
> +            unsafe fn leak_immutable<'a>(
>                   &'a self,
>                   py: Python<'a>,
>               ) -> PyResult<($leaked, &'static $inner_struct)> {
> @@ -247,7 +260,9 @@ macro_rules! py_shared_ref {
>           }
>   
>           impl $leaked {
> -            fn new(py: Python, inner: &$name) -> Self {
> +            // Marked as unsafe so client code wouldn't construct $leaked
> +            // struct by mistake. Its drop() is unsafe.
> +            unsafe fn new(py: Python, inner: &$name) -> Self {
>                   Self {
>                       inner: inner.clone_ref(py),
>                   }
> @@ -258,9 +273,10 @@ macro_rules! py_shared_ref {
>               fn drop(&mut self) {
>                   let gil = Python::acquire_gil();
>                   let py = gil.python();
> -                self.inner
> -                    .py_shared_state(py)
> -                    .decrease_leak_count(py, false);
> +                let state = self.inner.py_shared_state(py);
> +                unsafe {
> +                    state.decrease_leak_count(py, false);
> +                }
>               }
>           }
>       };
> @@ -346,7 +362,7 @@ macro_rules! py_shared_iterator_impl {
>   ///     data py_shared_state: PySharedState;
>   ///
>   ///     def __iter__(&self) -> PyResult<MyTypeItemsIterator> {
> -///         let (leak_handle, leaked_ref) = self.leak_immutable(py)?;
> +///         let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? };
>   ///         MyTypeItemsIterator::create_instance(
>   ///             py,
>   ///             RefCell::new(Some(leak_handle)),
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
Augie Fackler - Sept. 5, 2019, 6:28 p.m.
On Sun, Sep 01, 2019 at 08:46:07PM +0200, Raphaël Gomès wrote:
> Thanks a lot for this series, it looks very good to me.
>
> There was indeed a lot of room for improvement.

Queued per this review, thanks.
Yuya Nishihara - Sept. 8, 2019, 10:09 a.m.
On Sun, 1 Sep 2019 20:46:07 +0200, Raphaël Gomès wrote:
> > It wasn't trivial to fix leak_immutable() to be safe since we have to
> > allow immutable operations (e.g. iter()) on the leaked reference. So
> > let's mark it unsafe for now. Callers must take care of the returned
> > object to guarantee the memory safety.
> >
> > I'll revisit this later. I think $leaked<T: 'static> could have a function
> > that converts itself into $leaked<U: 'static> with a given FnOnce(&T) -> &U,
> > where T is $inner_struct, and U is $iterator_type for example.
> I like your idea for your next series on this issue, I think it's 
> possible to reduce the unsafety to a minimum.

Confirmed that it's doable with PoC-level code. I'll send a set of patches
maybe next weekend. For reference, the lambda function should be
FnOnce(&'static T) -> U, where U is Iter<'static, ..> for instance.

Patch

diff --git a/rust/hg-cpython/src/dirstate/dirs_multiset.rs b/rust/hg-cpython/src/dirstate/dirs_multiset.rs
--- a/rust/hg-cpython/src/dirstate/dirs_multiset.rs
+++ b/rust/hg-cpython/src/dirstate/dirs_multiset.rs
@@ -86,7 +86,7 @@  py_class!(pub class Dirs |py| {
             })
     }
     def __iter__(&self) -> PyResult<DirsMultisetKeysIterator> {
-        let (leak_handle, leaked_ref) = self.leak_immutable(py)?;
+        let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? };
         DirsMultisetKeysIterator::create_instance(
             py,
             RefCell::new(Some(leak_handle)),
diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs
@@ -319,7 +319,7 @@  py_class!(pub class DirstateMap |py| {
     }
 
     def keys(&self) -> PyResult<DirstateMapKeysIterator> {
-        let (leak_handle, leaked_ref) = self.leak_immutable(py)?;
+        let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? };
         DirstateMapKeysIterator::from_inner(
             py,
             Some(leak_handle),
@@ -328,7 +328,7 @@  py_class!(pub class DirstateMap |py| {
     }
 
     def items(&self) -> PyResult<DirstateMapItemsIterator> {
-        let (leak_handle, leaked_ref) = self.leak_immutable(py)?;
+        let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? };
         DirstateMapItemsIterator::from_inner(
             py,
             Some(leak_handle),
@@ -337,7 +337,7 @@  py_class!(pub class DirstateMap |py| {
     }
 
     def __iter__(&self) -> PyResult<DirstateMapKeysIterator> {
-        let (leak_handle, leaked_ref) = self.leak_immutable(py)?;
+        let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? };
         DirstateMapKeysIterator::from_inner(
             py,
             Some(leak_handle),
@@ -434,7 +434,7 @@  py_class!(pub class DirstateMap |py| {
     }
 
     def copymapiter(&self) -> PyResult<CopyMapKeysIterator> {
-        let (leak_handle, leaked_ref) = self.leak_immutable(py)?;
+        let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? };
         CopyMapKeysIterator::from_inner(
             py,
             Some(leak_handle),
@@ -443,7 +443,7 @@  py_class!(pub class DirstateMap |py| {
     }
 
     def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> {
-        let (leak_handle, leaked_ref) = self.leak_immutable(py)?;
+        let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? };
         CopyMapItemsIterator::from_inner(
             py,
             Some(leak_handle),
diff --git a/rust/hg-cpython/src/ref_sharing.rs b/rust/hg-cpython/src/ref_sharing.rs
--- a/rust/hg-cpython/src/ref_sharing.rs
+++ b/rust/hg-cpython/src/ref_sharing.rs
@@ -58,7 +58,12 @@  impl PySharedState {
     /// Return a reference to the wrapped data with an artificial static
     /// lifetime.
     /// We need to be protected by the GIL for thread-safety.
-    pub fn leak_immutable<T>(
+    ///
+    /// # Safety
+    ///
+    /// This is highly unsafe since the lifetime of the given data can be
+    /// extended. Do not call this function directly.
+    pub unsafe fn leak_immutable<T>(
         &self,
         py: Python,
         data: &PySharedRefCell<T>,
@@ -72,10 +77,14 @@  impl PySharedState {
         }
         let ptr = data.as_ptr();
         self.leak_count.replace(self.leak_count.get() + 1);
-        unsafe { Ok(&*ptr) }
+        Ok(&*ptr)
     }
 
-    pub fn decrease_leak_count(&self, _py: Python, mutable: bool) {
+    /// # Safety
+    ///
+    /// It's unsafe to update the reference count without knowing the
+    /// reference is deleted. Do not call this function directly.
+    pub unsafe fn decrease_leak_count(&self, _py: Python, mutable: bool) {
         self.leak_count
             .replace(self.leak_count.get().saturating_sub(1));
         if mutable {
@@ -123,6 +132,8 @@  pub struct PyRefMut<'a, T> {
 }
 
 impl<'a, T> PyRefMut<'a, T> {
+    // Must be constructed by PySharedState after checking its leak_count.
+    // Otherwise, drop() would incorrectly update the state.
     fn new(
         _py: Python<'a>,
         inner: RefMut<'a, T>,
@@ -152,7 +163,9 @@  impl<'a, T> Drop for PyRefMut<'a, T> {
     fn drop(&mut self) {
         let gil = Python::acquire_gil();
         let py = gil.python();
-        self.py_shared_state.decrease_leak_count(py, true);
+        unsafe {
+            self.py_shared_state.decrease_leak_count(py, true);
+        }
     }
 }
 
@@ -223,7 +236,7 @@  macro_rules! py_shared_ref {
             /// It's up to you to make sure that the management object lives
             /// longer than the leaked reference. Otherwise, you'll get a
             /// dangling reference.
-            fn leak_immutable<'a>(
+            unsafe fn leak_immutable<'a>(
                 &'a self,
                 py: Python<'a>,
             ) -> PyResult<($leaked, &'static $inner_struct)> {
@@ -247,7 +260,9 @@  macro_rules! py_shared_ref {
         }
 
         impl $leaked {
-            fn new(py: Python, inner: &$name) -> Self {
+            // Marked as unsafe so client code wouldn't construct $leaked
+            // struct by mistake. Its drop() is unsafe.
+            unsafe fn new(py: Python, inner: &$name) -> Self {
                 Self {
                     inner: inner.clone_ref(py),
                 }
@@ -258,9 +273,10 @@  macro_rules! py_shared_ref {
             fn drop(&mut self) {
                 let gil = Python::acquire_gil();
                 let py = gil.python();
-                self.inner
-                    .py_shared_state(py)
-                    .decrease_leak_count(py, false);
+                let state = self.inner.py_shared_state(py);
+                unsafe {
+                    state.decrease_leak_count(py, false);
+                }
             }
         }
     };
@@ -346,7 +362,7 @@  macro_rules! py_shared_iterator_impl {
 ///     data py_shared_state: PySharedState;
 ///
 ///     def __iter__(&self) -> PyResult<MyTypeItemsIterator> {
-///         let (leak_handle, leaked_ref) = self.leak_immutable(py)?;
+///         let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? };
 ///         MyTypeItemsIterator::create_instance(
 ///             py,
 ///             RefCell::new(Some(leak_handle)),