Patchwork D7796: rust-nodemap: input/output primitives

login
register
mail settings
Submitter phabricator
Date Feb. 14, 2020, 11:02 a.m.
Message ID <4f6424e4a2a48bd36fe9f42db751da15@localhost.localdomain>
Download mbox | patch
Permalink /patch/45230/
State Not Applicable
Headers show

Comments

phabricator - Feb. 14, 2020, 11:02 a.m.
Alphare updated this revision to Diff 20215.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7796?vs=20025&id=20215

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7796/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7796

AFFECTED FILES
  rust/hg-core/src/revlog/nodemap.rs

CHANGE DETAILS




To: gracinet, #hg-reviewers, kevincox, durin42
Cc: Alphare, marmoute, durin42, kevincox, mercurial-devel
Yuya Nishihara - Feb. 14, 2020, 1:17 p.m.
> +    ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
> +        let (readonly, vec) = self.into_readonly_and_added();
> +        // Prevent running `v`'s destructor so we are in complete control
> +        // of the allocation.
> +        let vec = mem::ManuallyDrop::new(vec);
> +
> +        let bytes = unsafe {
> +            // This is safe because we check at compile-time that there is no
> +            // padding.
> +            // /!\ Any use of `vec` after this is use-after-free.
> +
> +            let _: [u8; 4 * BLOCK_SIZE] =
> +                std::mem::transmute([Block::new(); 4]);
> +            Vec::from_raw_parts(
> +                vec.as_ptr() as *mut u8,
> +                vec.len() * BLOCK_SIZE,
> +                vec.capacity() * BLOCK_SIZE,
> +            )

Appears that this is unsafe. The doc states that the source type must have the
exact same alignment as `Vec<u8>` probably because the allocator may use
separate bucket per alignment.

https://doc.rust-lang.org/std/vec/struct.Vec.html#method.from_raw_parts

"It's also not safe to build one from a Vec<u16> and its length, because the
allocator cares about the alignment, and these two types have different alignments."

Can't we instead implement `as_bytes() -> &[u8]`?
phabricator - Feb. 14, 2020, 1:19 p.m.
yuja added a comment.


  > +    ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
  > +        let (readonly, vec) = self.into_readonly_and_added();
  > +        // Prevent running `v`'s destructor so we are in complete control
  > +        // of the allocation.
  > +        let vec = mem::ManuallyDrop::new(vec);
  > +
  > +        let bytes = unsafe {
  > +            // This is safe because we check at compile-time that there is no
  > +            // padding.
  > +            // /!\ Any use of `vec` after this is use-after-free.
  > +
  > +            let _: [u8; 4 * BLOCK_SIZE] =
  > +                std::mem::transmute([Block::new(); 4]);
  > +            Vec::from_raw_parts(
  > +                vec.as_ptr() as *mut u8,
  > +                vec.len() * BLOCK_SIZE,
  > +                vec.capacity() * BLOCK_SIZE,
  > +            )
  
  Appears that this is unsafe. The doc states that the source type must have the
  exact same alignment as `Vec<u8>` probably because the allocator may use
  separate bucket per alignment.
  
  https://doc.rust-lang.org/std/vec/struct.Vec.html#method.from_raw_parts
  
  "It's also not safe to build one from a Vec<u16> and its length, because the
  allocator cares about the alignment, and these two types have different alignments."
  
  Can't we instead implement `as_bytes() -> &[u8]`?

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7796/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7796

To: gracinet, #hg-reviewers, kevincox, durin42
Cc: yuja, Alphare, marmoute, durin42, kevincox, mercurial-devel

Patch

diff --git a/rust/hg-core/src/revlog/nodemap.rs b/rust/hg-core/src/revlog/nodemap.rs
--- a/rust/hg-core/src/revlog/nodemap.rs
+++ b/rust/hg-core/src/revlog/nodemap.rs
@@ -17,8 +17,10 @@ 
 };
 
 use std::fmt;
+use std::mem;
 use std::ops::Deref;
 use std::ops::Index;
+use std::slice;
 
 #[derive(Debug, PartialEq)]
 pub enum NodeMapError {
@@ -172,9 +174,11 @@ 
 /// represented at all, because we want an immutable empty nodetree
 /// to be valid.
 
-#[derive(Clone, PartialEq)]
+#[derive(Copy, Clone, PartialEq)]
 pub struct Block([RawElement; 16]);
 
+pub const BLOCK_SIZE: usize = mem::size_of::<Block>();
+
 impl Block {
     fn new() -> Self {
         Block([-1; 16])
@@ -262,6 +266,65 @@ 
         }
     }
 
+    /// Create from an opaque bunch of bytes
+    ///
+    /// The created `NodeTreeBytes` from `buffer`,
+    /// of which exactly `amount` bytes are used.
+    ///
+    /// - `buffer` could be derived from `PyBuffer` and `Mmap` objects.
+    /// - `offset` allows for the final file format to include fixed data
+    ///   (generation number, behavioural flags)
+    /// - `amount` is expressed in bytes, and is not automatically derived from
+    ///   `bytes`, so that a caller that manages them atomically can perform
+    ///   temporary disk serializations and still rollback easily if needed.
+    ///   First use-case for this would be to support Mercurial shell hooks.
+    ///
+    /// panics if `buffer` is smaller than `amount`
+    pub fn load_bytes(
+        bytes: Box<dyn Deref<Target = [u8]> + Send>,
+        amount: usize,
+    ) -> Self {
+        NodeTree::new(Box::new(NodeTreeBytes::new(bytes, amount)))
+    }
+
+    /// Retrieve added `Block` and the original immutable data
+    pub fn into_readonly_and_added(
+        self,
+    ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<Block>) {
+        let mut vec = self.growable;
+        let readonly = self.readonly;
+        if readonly.last() != Some(&self.root) {
+            vec.push(self.root);
+        }
+        (readonly, vec)
+    }
+
+    /// Retrieve added `Blocks` as bytes, ready to be written to persistent
+    /// storage
+    pub fn into_readonly_and_added_bytes(
+        self,
+    ) -> (Box<dyn Deref<Target = [Block]> + Send>, Vec<u8>) {
+        let (readonly, vec) = self.into_readonly_and_added();
+        // Prevent running `v`'s destructor so we are in complete control
+        // of the allocation.
+        let vec = mem::ManuallyDrop::new(vec);
+
+        let bytes = unsafe {
+            // This is safe because we check at compile-time that there is no
+            // padding.
+            // /!\ Any use of `vec` after this is use-after-free.
+
+            let _: [u8; 4 * BLOCK_SIZE] =
+                std::mem::transmute([Block::new(); 4]);
+            Vec::from_raw_parts(
+                vec.as_ptr() as *mut u8,
+                vec.len() * BLOCK_SIZE,
+                vec.capacity() * BLOCK_SIZE,
+            )
+        };
+        (readonly, bytes)
+    }
+
     /// Total number of blocks
     fn len(&self) -> usize {
         self.readonly.len() + self.growable.len() + 1
@@ -410,6 +473,38 @@ 
     }
 }
 
+pub struct NodeTreeBytes {
+    buffer: Box<dyn Deref<Target = [u8]> + Send>,
+    len_in_blocks: usize,
+}
+
+impl NodeTreeBytes {
+    fn new(
+        buffer: Box<dyn Deref<Target = [u8]> + Send>,
+        amount: usize,
+    ) -> Self {
+        assert!(buffer.len() >= amount);
+        let len_in_blocks = amount / BLOCK_SIZE;
+        NodeTreeBytes {
+            buffer,
+            len_in_blocks,
+        }
+    }
+}
+
+impl Deref for NodeTreeBytes {
+    type Target = [Block];
+
+    fn deref(&self) -> &[Block] {
+        unsafe {
+            slice::from_raw_parts(
+                (&self.buffer).as_ptr() as *const Block,
+                self.len_in_blocks,
+            )
+        }
+    }
+}
+
 struct NodeTreeVisitor<'n, 'p> {
     nt: &'n NodeTree,
     prefix: NodePrefixRef<'p>,
@@ -787,4 +882,30 @@ 
 
         Ok(())
     }
+
+    #[test]
+    fn test_into_added_empty() {
+        assert!(sample_nodetree().into_readonly_and_added().1.is_empty());
+        assert!(sample_nodetree()
+            .into_readonly_and_added_bytes()
+            .1
+            .is_empty());
+    }
+
+    #[test]
+    fn test_into_added_bytes() -> Result<(), NodeMapError> {
+        let mut idx = TestNtIndex::new();
+        idx.insert(0, "1234")?;
+        let mut idx = idx.commit();
+        idx.insert(4, "cafe")?;
+        let (_, bytes) = idx.nt.into_readonly_and_added_bytes();
+
+        // only the root block has been changed
+        assert_eq!(bytes.len(), BLOCK_SIZE);
+        // big endian for -2
+        assert_eq!(&bytes[4..2 * 4], [255, 255, 255, 254]);
+        // big endian for -6
+        assert_eq!(&bytes[12 * 4..13 * 4], [255, 255, 255, 250]);
+        Ok(())
+    }
 }