Submitter | Yuya Nishihara |
---|---|
Date | Sept. 1, 2019, 1:40 p.m. |
Message ID | <7de30788e0ec64cee7bb.1567345234@mimosa> |
Download | mbox | patch |
Permalink | /patch/41462/ |
State | Accepted |
Headers | show |
Comments
Thanks a lot for this series, it looks very good to me. There was indeed a lot of room for improvement. On 9/1/19 3:40 PM, Yuya Nishihara wrote: > # HG changeset patch > # User Yuya Nishihara <yuya@tcha.org> > # Date 1567328791 -32400 > # Sun Sep 01 18:06:31 2019 +0900 > # Node ID 7de30788e0ec64cee7bba5b3fd620a188bf81a09 > # Parent a69f68569113eaa65e3a0e2773d27e2f2dea00bd > rust-cpython: mark unsafe functions as such > > It wasn't trivial to fix leak_immutable() to be safe since we have to > allow immutable operations (e.g. iter()) on the leaked reference. So > let's mark it unsafe for now. Callers must take care of the returned > object to guarantee the memory safety. > > I'll revisit this later. I think $leaked<T: 'static> could have a function > that converts itself into $leaked<U: 'static> with a given FnOnce(&T) -> &U, > where T is $inner_struct, and U is $iterator_type for example. I like your idea for your next series on this issue, I think it's possible to reduce the unsafety to a minimum. > > diff --git a/rust/hg-cpython/src/dirstate/dirs_multiset.rs b/rust/hg-cpython/src/dirstate/dirs_multiset.rs > --- a/rust/hg-cpython/src/dirstate/dirs_multiset.rs > +++ b/rust/hg-cpython/src/dirstate/dirs_multiset.rs > @@ -86,7 +86,7 @@ py_class!(pub class Dirs |py| { > }) > } > def __iter__(&self) -> PyResult<DirsMultisetKeysIterator> { > - let (leak_handle, leaked_ref) = self.leak_immutable(py)?; > + let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? }; > DirsMultisetKeysIterator::create_instance( > py, > RefCell::new(Some(leak_handle)), > diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs > --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs > +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs > @@ -319,7 +319,7 @@ py_class!(pub class DirstateMap |py| { > } > > def keys(&self) -> PyResult<DirstateMapKeysIterator> { > - let (leak_handle, leaked_ref) = self.leak_immutable(py)?; > + let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? }; > DirstateMapKeysIterator::from_inner( > py, > Some(leak_handle), > @@ -328,7 +328,7 @@ py_class!(pub class DirstateMap |py| { > } > > def items(&self) -> PyResult<DirstateMapItemsIterator> { > - let (leak_handle, leaked_ref) = self.leak_immutable(py)?; > + let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? }; > DirstateMapItemsIterator::from_inner( > py, > Some(leak_handle), > @@ -337,7 +337,7 @@ py_class!(pub class DirstateMap |py| { > } > > def __iter__(&self) -> PyResult<DirstateMapKeysIterator> { > - let (leak_handle, leaked_ref) = self.leak_immutable(py)?; > + let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? }; > DirstateMapKeysIterator::from_inner( > py, > Some(leak_handle), > @@ -434,7 +434,7 @@ py_class!(pub class DirstateMap |py| { > } > > def copymapiter(&self) -> PyResult<CopyMapKeysIterator> { > - let (leak_handle, leaked_ref) = self.leak_immutable(py)?; > + let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? }; > CopyMapKeysIterator::from_inner( > py, > Some(leak_handle), > @@ -443,7 +443,7 @@ py_class!(pub class DirstateMap |py| { > } > > def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> { > - let (leak_handle, leaked_ref) = self.leak_immutable(py)?; > + let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? }; > CopyMapItemsIterator::from_inner( > py, > Some(leak_handle), > diff --git a/rust/hg-cpython/src/ref_sharing.rs b/rust/hg-cpython/src/ref_sharing.rs > --- a/rust/hg-cpython/src/ref_sharing.rs > +++ b/rust/hg-cpython/src/ref_sharing.rs > @@ -58,7 +58,12 @@ impl PySharedState { > /// Return a reference to the wrapped data with an artificial static > /// lifetime. > /// We need to be protected by the GIL for thread-safety. > - pub fn leak_immutable<T>( > + /// > + /// # Safety > + /// > + /// This is highly unsafe since the lifetime of the given data can be > + /// extended. Do not call this function directly. > + pub unsafe fn leak_immutable<T>( > &self, > py: Python, > data: &PySharedRefCell<T>, > @@ -72,10 +77,14 @@ impl PySharedState { > } > let ptr = data.as_ptr(); > self.leak_count.replace(self.leak_count.get() + 1); > - unsafe { Ok(&*ptr) } > + Ok(&*ptr) > } > > - pub fn decrease_leak_count(&self, _py: Python, mutable: bool) { > + /// # Safety > + /// > + /// It's unsafe to update the reference count without knowing the > + /// reference is deleted. Do not call this function directly. > + pub unsafe fn decrease_leak_count(&self, _py: Python, mutable: bool) { > self.leak_count > .replace(self.leak_count.get().saturating_sub(1)); > if mutable { > @@ -123,6 +132,8 @@ pub struct PyRefMut<'a, T> { > } > > impl<'a, T> PyRefMut<'a, T> { > + // Must be constructed by PySharedState after checking its leak_count. > + // Otherwise, drop() would incorrectly update the state. > fn new( > _py: Python<'a>, > inner: RefMut<'a, T>, > @@ -152,7 +163,9 @@ impl<'a, T> Drop for PyRefMut<'a, T> { > fn drop(&mut self) { > let gil = Python::acquire_gil(); > let py = gil.python(); > - self.py_shared_state.decrease_leak_count(py, true); > + unsafe { > + self.py_shared_state.decrease_leak_count(py, true); > + } > } > } > > @@ -223,7 +236,7 @@ macro_rules! py_shared_ref { > /// It's up to you to make sure that the management object lives > /// longer than the leaked reference. Otherwise, you'll get a > /// dangling reference. > - fn leak_immutable<'a>( > + unsafe fn leak_immutable<'a>( > &'a self, > py: Python<'a>, > ) -> PyResult<($leaked, &'static $inner_struct)> { > @@ -247,7 +260,9 @@ macro_rules! py_shared_ref { > } > > impl $leaked { > - fn new(py: Python, inner: &$name) -> Self { > + // Marked as unsafe so client code wouldn't construct $leaked > + // struct by mistake. Its drop() is unsafe. > + unsafe fn new(py: Python, inner: &$name) -> Self { > Self { > inner: inner.clone_ref(py), > } > @@ -258,9 +273,10 @@ macro_rules! py_shared_ref { > fn drop(&mut self) { > let gil = Python::acquire_gil(); > let py = gil.python(); > - self.inner > - .py_shared_state(py) > - .decrease_leak_count(py, false); > + let state = self.inner.py_shared_state(py); > + unsafe { > + state.decrease_leak_count(py, false); > + } > } > } > }; > @@ -346,7 +362,7 @@ macro_rules! py_shared_iterator_impl { > /// data py_shared_state: PySharedState; > /// > /// def __iter__(&self) -> PyResult<MyTypeItemsIterator> { > -/// let (leak_handle, leaked_ref) = self.leak_immutable(py)?; > +/// let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? }; > /// MyTypeItemsIterator::create_instance( > /// py, > /// RefCell::new(Some(leak_handle)), > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@mercurial-scm.org > https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel
On Sun, Sep 01, 2019 at 08:46:07PM +0200, Raphaël Gomès wrote: > Thanks a lot for this series, it looks very good to me. > > There was indeed a lot of room for improvement. Queued per this review, thanks.
On Sun, 1 Sep 2019 20:46:07 +0200, Raphaël Gomès wrote: > > It wasn't trivial to fix leak_immutable() to be safe since we have to > > allow immutable operations (e.g. iter()) on the leaked reference. So > > let's mark it unsafe for now. Callers must take care of the returned > > object to guarantee the memory safety. > > > > I'll revisit this later. I think $leaked<T: 'static> could have a function > > that converts itself into $leaked<U: 'static> with a given FnOnce(&T) -> &U, > > where T is $inner_struct, and U is $iterator_type for example. > I like your idea for your next series on this issue, I think it's > possible to reduce the unsafety to a minimum. Confirmed that it's doable with PoC-level code. I'll send a set of patches maybe next weekend. For reference, the lambda function should be FnOnce(&'static T) -> U, where U is Iter<'static, ..> for instance.
Patch
diff --git a/rust/hg-cpython/src/dirstate/dirs_multiset.rs b/rust/hg-cpython/src/dirstate/dirs_multiset.rs --- a/rust/hg-cpython/src/dirstate/dirs_multiset.rs +++ b/rust/hg-cpython/src/dirstate/dirs_multiset.rs @@ -86,7 +86,7 @@ py_class!(pub class Dirs |py| { }) } def __iter__(&self) -> PyResult<DirsMultisetKeysIterator> { - let (leak_handle, leaked_ref) = self.leak_immutable(py)?; + let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? }; DirsMultisetKeysIterator::create_instance( py, RefCell::new(Some(leak_handle)), diff --git a/rust/hg-cpython/src/dirstate/dirstate_map.rs b/rust/hg-cpython/src/dirstate/dirstate_map.rs --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs @@ -319,7 +319,7 @@ py_class!(pub class DirstateMap |py| { } def keys(&self) -> PyResult<DirstateMapKeysIterator> { - let (leak_handle, leaked_ref) = self.leak_immutable(py)?; + let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? }; DirstateMapKeysIterator::from_inner( py, Some(leak_handle), @@ -328,7 +328,7 @@ py_class!(pub class DirstateMap |py| { } def items(&self) -> PyResult<DirstateMapItemsIterator> { - let (leak_handle, leaked_ref) = self.leak_immutable(py)?; + let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? }; DirstateMapItemsIterator::from_inner( py, Some(leak_handle), @@ -337,7 +337,7 @@ py_class!(pub class DirstateMap |py| { } def __iter__(&self) -> PyResult<DirstateMapKeysIterator> { - let (leak_handle, leaked_ref) = self.leak_immutable(py)?; + let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? }; DirstateMapKeysIterator::from_inner( py, Some(leak_handle), @@ -434,7 +434,7 @@ py_class!(pub class DirstateMap |py| { } def copymapiter(&self) -> PyResult<CopyMapKeysIterator> { - let (leak_handle, leaked_ref) = self.leak_immutable(py)?; + let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? }; CopyMapKeysIterator::from_inner( py, Some(leak_handle), @@ -443,7 +443,7 @@ py_class!(pub class DirstateMap |py| { } def copymapitemsiter(&self) -> PyResult<CopyMapItemsIterator> { - let (leak_handle, leaked_ref) = self.leak_immutable(py)?; + let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? }; CopyMapItemsIterator::from_inner( py, Some(leak_handle), diff --git a/rust/hg-cpython/src/ref_sharing.rs b/rust/hg-cpython/src/ref_sharing.rs --- a/rust/hg-cpython/src/ref_sharing.rs +++ b/rust/hg-cpython/src/ref_sharing.rs @@ -58,7 +58,12 @@ impl PySharedState { /// Return a reference to the wrapped data with an artificial static /// lifetime. /// We need to be protected by the GIL for thread-safety. - pub fn leak_immutable<T>( + /// + /// # Safety + /// + /// This is highly unsafe since the lifetime of the given data can be + /// extended. Do not call this function directly. + pub unsafe fn leak_immutable<T>( &self, py: Python, data: &PySharedRefCell<T>, @@ -72,10 +77,14 @@ impl PySharedState { } let ptr = data.as_ptr(); self.leak_count.replace(self.leak_count.get() + 1); - unsafe { Ok(&*ptr) } + Ok(&*ptr) } - pub fn decrease_leak_count(&self, _py: Python, mutable: bool) { + /// # Safety + /// + /// It's unsafe to update the reference count without knowing the + /// reference is deleted. Do not call this function directly. + pub unsafe fn decrease_leak_count(&self, _py: Python, mutable: bool) { self.leak_count .replace(self.leak_count.get().saturating_sub(1)); if mutable { @@ -123,6 +132,8 @@ pub struct PyRefMut<'a, T> { } impl<'a, T> PyRefMut<'a, T> { + // Must be constructed by PySharedState after checking its leak_count. + // Otherwise, drop() would incorrectly update the state. fn new( _py: Python<'a>, inner: RefMut<'a, T>, @@ -152,7 +163,9 @@ impl<'a, T> Drop for PyRefMut<'a, T> { fn drop(&mut self) { let gil = Python::acquire_gil(); let py = gil.python(); - self.py_shared_state.decrease_leak_count(py, true); + unsafe { + self.py_shared_state.decrease_leak_count(py, true); + } } } @@ -223,7 +236,7 @@ macro_rules! py_shared_ref { /// It's up to you to make sure that the management object lives /// longer than the leaked reference. Otherwise, you'll get a /// dangling reference. - fn leak_immutable<'a>( + unsafe fn leak_immutable<'a>( &'a self, py: Python<'a>, ) -> PyResult<($leaked, &'static $inner_struct)> { @@ -247,7 +260,9 @@ macro_rules! py_shared_ref { } impl $leaked { - fn new(py: Python, inner: &$name) -> Self { + // Marked as unsafe so client code wouldn't construct $leaked + // struct by mistake. Its drop() is unsafe. + unsafe fn new(py: Python, inner: &$name) -> Self { Self { inner: inner.clone_ref(py), } @@ -258,9 +273,10 @@ macro_rules! py_shared_ref { fn drop(&mut self) { let gil = Python::acquire_gil(); let py = gil.python(); - self.inner - .py_shared_state(py) - .decrease_leak_count(py, false); + let state = self.inner.py_shared_state(py); + unsafe { + state.decrease_leak_count(py, false); + } } } }; @@ -346,7 +362,7 @@ macro_rules! py_shared_iterator_impl { /// data py_shared_state: PySharedState; /// /// def __iter__(&self) -> PyResult<MyTypeItemsIterator> { -/// let (leak_handle, leaked_ref) = self.leak_immutable(py)?; +/// let (leak_handle, leaked_ref) = unsafe { self.leak_immutable(py)? }; /// MyTypeItemsIterator::create_instance( /// py, /// RefCell::new(Some(leak_handle)),