Patchwork D6628: rust-parsers: switch to parse/pack_dirstate to mutate-on-loop

login
register
mail settings
Submitter phabricator
Date July 10, 2019, 11:48 a.m.
Message ID <differential-rev-PHID-DREV-ja5f6v3vmxdhebb5nqlj-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/40862/
State New
Headers show

Comments

phabricator - July 10, 2019, 11:48 a.m.
Alphare created this revision.
Herald added subscribers: mercurial-devel, mjpieters, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Both `parse_dirstate` and `pack_dirstate` can operate directly on the data
  they're passed, which prevents the creation of intermediate data structures,
  simplifies the function signatures and reduces boilerplate. They are exposed
  directly to the Python for now, but a later patch will make use of them inside
  `hg-core`.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6628

AFFECTED FILES
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/dirstate/mod.rs
  rust/hg-core/src/dirstate/parsers.rs
  rust/hg-core/src/lib.rs
  rust/hg-cpython/src/dirstate/dirs_multiset.rs
  rust/hg-cpython/src/dirstate/mod.rs
  rust/hg-cpython/src/parsers.rs

CHANGE DETAILS




To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mjpieters, mercurial-devel
phabricator - July 15, 2019, 4:24 p.m.
kevincox added inline comments.
kevincox accepted this revision.

INLINE COMMENTS

> mod.rs:16
> +    pub p1: Vec<u8>,
> +    pub p2: Vec<u8>,
>  }

If these are always 20 bytes long you are best to use `[u8; 20]`. That way there will be no indirection. And since `Vec` is 3 words it will have a bigger immediate size on 64bit platforms.

> parsers.rs:81
>      parents: DirstateParents,
> -    now: i32,
> -) -> Result<(Vec<u8>, DirstateVec), DirstatePackError> {
> +    now: Duration,
> +) -> Result<Vec<u8>, DirstatePackError> {

The "proper" solution is SystemTime <https://doc.rust-lang.org/std/time/struct.SystemTime.html> however I will admit that it is a bit more awkward to use. However if you are going to use Duration please document that it is the Duration since the Unix Epoch.

> parsers.rs:87
>  
> -    let expected_size: usize = dirstate_vec
> +    let now = now.as_secs() as i32;
> +

Would it be best to panic on overflow? `now.as_secs().try_into::<i32>().expect("time overflow")`.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6628/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6628

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mjpieters, mercurial-devel

Patch

diff --git a/rust/hg-cpython/src/parsers.rs b/rust/hg-cpython/src/parsers.rs
--- a/rust/hg-cpython/src/parsers.rs
+++ b/rust/hg-cpython/src/parsers.rs
@@ -12,17 +12,18 @@ 
 //!
 use cpython::{
     exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyResult, PyTuple, Python,
-    ToPyObject, PythonObject
+    ToPyObject,
 };
 use hg::{
-    pack_dirstate, parse_dirstate, CopyVecEntry, DirstateEntry,
-    DirstatePackError, DirstateParents, DirstateParseError,
+    pack_dirstate, parse_dirstate, DirstateEntry, DirstatePackError,
+    DirstateParents, DirstateParseError,
 };
 use std::collections::HashMap;
 
 use libc::c_char;
 
-use dirstate::{decapsule_make_dirstate_tuple, extract_dirstate_vec};
+use dirstate::{decapsule_make_dirstate_tuple, extract_dirstate};
+use std::time::Duration;
 
 fn parse_dirstate_wrapper(
     py: Python,
@@ -30,12 +31,15 @@ 
     copymap: PyDict,
     st: PyBytes,
 ) -> PyResult<PyTuple> {
-    match parse_dirstate(st.data(py)) {
-        Ok((parents, dirstate_vec, copies)) => {
-            for (filename, entry) in dirstate_vec {
+    let mut dirstate_map = HashMap::new();
+    let mut copies = HashMap::new();
+
+    match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
+        Ok(parents) => {
+            for (filename, entry) in dirstate_map {
                 dmap.set_item(
                     py,
-                    PyBytes::new(py, &filename[..]),
+                    PyBytes::new(py, &filename),
                     decapsule_make_dirstate_tuple(py)?(
                         entry.state as c_char,
                         entry.mode,
@@ -44,15 +48,17 @@ 
                     ),
                 )?;
             }
-            for CopyVecEntry { path, copy_path } in copies {
+            for (path, copy_path) in copies {
                 copymap.set_item(
                     py,
-                    PyBytes::new(py, path),
-                    PyBytes::new(py, copy_path),
+                    PyBytes::new(py, &path),
+                    PyBytes::new(py, &copy_path),
                 )?;
             }
-            Ok((PyBytes::new(py, parents.p1), PyBytes::new(py, parents.p2))
-                .to_py_object(py))
+            Ok(
+                (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
+                    .to_py_object(py),
+            )
         }
         Err(e) => Err(PyErr::new::<exc::ValueError, _>(
             py,
@@ -64,6 +70,9 @@ 
                     "overflow in dirstate".to_string()
                 }
                 DirstateParseError::CorruptedEntry(e) => e,
+                DirstateParseError::Damaged => {
+                    "dirstate appears to be damaged".to_string()
+                }
             },
         )),
     }
@@ -81,7 +90,7 @@ 
     let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
     let p2: &[u8] = p2.data(py);
 
-    let dirstate_vec = extract_dirstate_vec(py, &dmap)?;
+    let mut dirstate_map = extract_dirstate(py, &dmap)?;
 
     let copies: Result<HashMap<Vec<u8>, Vec<u8>>, PyErr> = copymap
         .items(py)
@@ -95,12 +104,15 @@ 
         .collect();
 
     match pack_dirstate(
-        &dirstate_vec,
+        &mut dirstate_map,
         &copies?,
-        DirstateParents { p1, p2 },
-        now.as_object().extract::<i32>(py)?,
+        DirstateParents {
+            p1: p1.to_owned(),
+            p2: p2.to_owned(),
+        },
+        Duration::from_secs(now.value(py) as u64),
     ) {
-        Ok((packed, new_dirstate_vec)) => {
+        Ok(packed) => {
             for (
                 filename,
                 DirstateEntry {
@@ -109,7 +121,7 @@ 
                     size,
                     mtime,
                 },
-            ) in new_dirstate_vec
+            ) in dirstate_map
             {
                 dmap.set_item(
                     py,
diff --git a/rust/hg-cpython/src/dirstate/mod.rs b/rust/hg-cpython/src/dirstate/mod.rs
--- a/rust/hg-cpython/src/dirstate/mod.rs
+++ b/rust/hg-cpython/src/dirstate/mod.rs
@@ -13,7 +13,7 @@ 
 use cpython::{
     PyBytes, PyDict, PyErr, PyModule, PyObject, PyResult, PySequence, Python,
 };
-use hg::{DirstateEntry, DirstateVec};
+use hg::{DirstateEntry, StateMap};
 use std::ffi::CStr;
 
 #[cfg(feature = "python27")]
@@ -59,10 +59,7 @@ 
     }
 }
 
-pub fn extract_dirstate_vec(
-    py: Python,
-    dmap: &PyDict,
-) -> Result<DirstateVec, PyErr> {
+pub fn extract_dirstate(py: Python, dmap: &PyDict) -> Result<StateMap, PyErr> {
     dmap.items(py)
         .iter()
         .map(|(filename, stats)| {
diff --git a/rust/hg-cpython/src/dirstate/dirs_multiset.rs b/rust/hg-cpython/src/dirstate/dirs_multiset.rs
--- a/rust/hg-cpython/src/dirstate/dirs_multiset.rs
+++ b/rust/hg-cpython/src/dirstate/dirs_multiset.rs
@@ -15,7 +15,7 @@ 
     ToPyObject,
 };
 
-use dirstate::extract_dirstate_vec;
+use dirstate::extract_dirstate;
 use hg::{DirsIterable, DirsMultiset, DirstateMapError};
 
 py_class!(pub class Dirs |py| {
@@ -32,12 +32,10 @@ 
         if let Some(skip) = skip {
             skip_state = Some(skip.extract::<PyBytes>(py)?.data(py)[0] as i8);
         }
-        let dirs_map;
-
-        if let Ok(map) = map.cast_as::<PyDict>(py) {
-            let dirstate_vec = extract_dirstate_vec(py, &map)?;
-            dirs_map = DirsMultiset::new(
-                DirsIterable::Dirstate(dirstate_vec),
+        let inner = if let Ok(map) = map.cast_as::<PyDict>(py) {
+            let dirstate = extract_dirstate(py, &map)?;
+            DirsMultiset::new(
+                DirsIterable::Dirstate(&dirstate),
                 skip_state,
             )
         } else {
@@ -45,13 +43,13 @@ 
                 .iter(py)?
                 .map(|o| Ok(o?.extract::<PyBytes>(py)?.data(py).to_owned()))
                 .collect();
-            dirs_map = DirsMultiset::new(
-                DirsIterable::Manifest(map?),
+            DirsMultiset::new(
+                DirsIterable::Manifest(&map?),
                 skip_state,
             )
-        }
+        };
 
-        Self::create_instance(py, RefCell::new(dirs_map))
+        Self::create_instance(py, RefCell::new(inner))
     }
 
     def addpath(&self, path: PyObject) -> PyResult<PyObject> {
diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -17,8 +17,7 @@ 
 pub use dirstate::{
     dirs_multiset::DirsMultiset,
     parsers::{pack_dirstate, parse_dirstate},
-    CopyVec, CopyVecEntry, DirsIterable, DirstateEntry, DirstateParents,
-    DirstateVec,
+    CopyMap, DirsIterable, DirstateEntry, DirstateParents, StateMap,
 };
 mod filepatterns;
 mod utils;
@@ -66,6 +65,7 @@ 
     TooLittleData,
     Overflow,
     CorruptedEntry(String),
+    Damaged,
 }
 
 #[derive(Debug, PartialEq)]
diff --git a/rust/hg-core/src/dirstate/parsers.rs b/rust/hg-core/src/dirstate/parsers.rs
--- a/rust/hg-core/src/dirstate/parsers.rs
+++ b/rust/hg-core/src/dirstate/parsers.rs
@@ -4,31 +4,31 @@ 
 // GNU General Public License version 2 or any later version.
 
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
-use std::collections::HashMap;
+use dirstate::{CopyMap, StateMap};
 use std::io::Cursor;
-use {
-    CopyVec, CopyVecEntry, DirstateEntry, DirstatePackError, DirstateParents,
-    DirstateParseError, DirstateVec,
-};
+use std::time::Duration;
+use {DirstateEntry, DirstatePackError, DirstateParents, DirstateParseError};
 
 /// Parents are stored in the dirstate as byte hashes.
 const PARENT_SIZE: usize = 20;
 /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
 const MIN_ENTRY_SIZE: usize = 17;
 
+// TODO parse/pack: is mutate-on-loop better for performance?
+
 pub fn parse_dirstate(
+    state_map: &mut StateMap,
+    copy_map: &mut CopyMap,
     contents: &[u8],
-) -> Result<(DirstateParents, DirstateVec, CopyVec), DirstateParseError> {
+) -> Result<DirstateParents, DirstateParseError> {
     if contents.len() < PARENT_SIZE * 2 {
         return Err(DirstateParseError::TooLittleData);
     }
 
-    let mut dirstate_vec = vec![];
-    let mut copies = vec![];
     let mut curr_pos = PARENT_SIZE * 2;
     let parents = DirstateParents {
-        p1: &contents[..PARENT_SIZE],
-        p2: &contents[PARENT_SIZE..curr_pos],
+        p1: contents[..PARENT_SIZE].to_vec(),
+        p2: contents[PARENT_SIZE..curr_pos].to_vec(),
     };
 
     while curr_pos < contents.len() {
@@ -57,9 +57,9 @@ 
         };
 
         if let Some(copy_path) = copy {
-            copies.push(CopyVecEntry { path, copy_path });
+            copy_map.insert(path.to_owned(), copy_path.to_owned());
         };
-        dirstate_vec.push((
+        state_map.insert(
             path.to_owned(),
             DirstateEntry {
                 state,
@@ -67,28 +67,30 @@ 
                 size,
                 mtime,
             },
-        ));
+        );
         curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
     }
 
-    Ok((parents, dirstate_vec, copies))
+    Ok(parents)
 }
 
 pub fn pack_dirstate(
-    dirstate_vec: &DirstateVec,
-    copymap: &HashMap<Vec<u8>, Vec<u8>>,
+    state_map: &mut StateMap,
+    copy_map: &CopyMap,
     parents: DirstateParents,
-    now: i32,
-) -> Result<(Vec<u8>, DirstateVec), DirstatePackError> {
+    now: Duration,
+) -> Result<Vec<u8>, DirstatePackError> {
     if parents.p1.len() != PARENT_SIZE || parents.p2.len() != PARENT_SIZE {
         return Err(DirstatePackError::CorruptedParent);
     }
 
-    let expected_size: usize = dirstate_vec
+    let now = now.as_secs() as i32;
+
+    let expected_size: usize = state_map
         .iter()
-        .map(|(ref filename, _)| {
+        .map(|(filename, _)| {
             let mut length = MIN_ENTRY_SIZE + filename.len();
-            if let Some(ref copy) = copymap.get(filename) {
+            if let Some(ref copy) = copy_map.get(filename) {
                 length += copy.len() + 1;
             }
             length
@@ -97,13 +99,13 @@ 
     let expected_size = expected_size + PARENT_SIZE * 2;
 
     let mut packed = Vec::with_capacity(expected_size);
-    let mut new_dirstate_vec = vec![];
+    let mut new_state_map = vec![];
 
     packed.extend(parents.p1);
     packed.extend(parents.p2);
 
-    for (ref filename, entry) in dirstate_vec {
-        let mut new_filename: Vec<u8> = filename.to_owned();
+    for (ref filename, entry) in state_map.iter() {
+        let mut new_filename: Vec<u8> = filename.to_vec();
         let mut new_mtime: i32 = entry.mtime;
         if entry.state == 'n' as i8 && entry.mtime == now.into() {
             // The file was last modified "simultaneously" with the current
@@ -116,8 +118,8 @@ 
             // contents of the file if the size is the same. This prevents
             // mistakenly treating such files as clean.
             new_mtime = -1;
-            new_dirstate_vec.push((
-                filename.to_owned(),
+            new_state_map.push((
+                filename.to_owned().to_vec(),
                 DirstateEntry {
                     mtime: new_mtime,
                     ..*entry
@@ -125,7 +127,7 @@ 
             ));
         }
 
-        if let Some(copy) = copymap.get(filename) {
+        if let Some(copy) = copy_map.get(*filename) {
             new_filename.push('\0' as u8);
             new_filename.extend(copy);
         }
@@ -142,66 +144,36 @@ 
         return Err(DirstatePackError::BadSize(expected_size, packed.len()));
     }
 
-    Ok((packed, new_dirstate_vec))
+    state_map.extend(new_state_map);
+
+    Ok(packed)
 }
-
 #[cfg(test)]
 mod tests {
     use super::*;
+    use std::collections::HashMap;
 
     #[test]
     fn test_pack_dirstate_empty() {
-        let dirstate_vec: DirstateVec = vec![];
+        let mut state_map: StateMap = HashMap::new();
         let copymap = HashMap::new();
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
-        let expected =
-            (b"1234567891011121314100000000000000000000".to_vec(), vec![]);
+        let now = Duration::new(15000000, 0);
+        let expected = b"1234567891011121314100000000000000000000".to_vec();
 
         assert_eq!(
             expected,
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
+            pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
         );
+
+        assert!(state_map.is_empty())
     }
     #[test]
     fn test_pack_dirstate_one_entry() {
-        let dirstate_vec: DirstateVec = vec![(
-            vec!['f' as u8, '1' as u8],
-            DirstateEntry {
-                state: 'n' as i8,
-                mode: 0o644,
-                size: 0,
-                mtime: 791231220,
-            },
-        )];
-        let copymap = HashMap::new();
-        let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
-        };
-        let now: i32 = 15000000;
-        let expected = (
-            [
-                49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50,
-                49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
-                48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0,
-                0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 2, 102, 49,
-            ]
-            .to_vec(),
-            vec![],
-        );
-
-        assert_eq!(
-            expected,
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
-        );
-    }
-    #[test]
-    fn test_pack_dirstate_one_entry_with_copy() {
-        let dirstate_vec: DirstateVec = vec![(
+        let expected_state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
                 state: 'n' as i8,
@@ -209,35 +181,36 @@ 
                 size: 0,
                 mtime: 791231220,
             },
-        )];
-        let mut copymap = HashMap::new();
-        copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
+        )]
+        .iter()
+        .cloned()
+        .collect();
+        let mut state_map = expected_state_map.clone();
+
+        let copymap = HashMap::new();
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
-        let expected = (
-            [
-                49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50,
-                49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
-                48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0,
-                0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111,
-                112, 121, 110, 97, 109, 101,
-            ]
-            .to_vec(),
-            vec![],
-        );
+        let now = Duration::new(15000000, 0);
+        let expected = [
+            49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
+            51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+            48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
+            41, 58, 244, 0, 0, 0, 2, 102, 49,
+        ]
+        .to_vec();
 
         assert_eq!(
             expected,
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
+            pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
         );
-    }
 
+        assert_eq!(expected_state_map, state_map);
+    }
     #[test]
-    fn test_parse_pack_one_entry_with_copy() {
-        let dirstate_vec: DirstateVec = vec![(
+    fn test_pack_dirstate_one_entry_with_copy() {
+        let expected_state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
                 state: 'n' as i8,
@@ -245,36 +218,76 @@ 
                 size: 0,
                 mtime: 791231220,
             },
-        )];
+        )]
+        .iter()
+        .cloned()
+        .collect();
+        let mut state_map = expected_state_map.clone();
         let mut copymap = HashMap::new();
         copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
-        let result =
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
+        let now = Duration::new(15000000, 0);
+        let expected = [
+            49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
+            51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+            48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
+            41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111, 112, 121, 110, 97,
+            109, 101,
+        ]
+        .to_vec();
 
         assert_eq!(
-            (
-                parents,
-                dirstate_vec,
-                copymap
-                    .iter()
-                    .map(|(k, v)| CopyVecEntry {
-                        path: k.as_slice(),
-                        copy_path: v.as_slice()
-                    })
-                    .collect()
-            ),
-            parse_dirstate(result.0.as_slice()).unwrap()
+            expected,
+            pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
+        );
+        assert_eq!(expected_state_map, state_map);
+    }
+
+    #[test]
+    fn test_parse_pack_one_entry_with_copy() {
+        let mut state_map: StateMap = [(
+            b"f1".to_vec(),
+            DirstateEntry {
+                state: 'n' as i8,
+                mode: 0o644,
+                size: 0,
+                mtime: 791231220,
+            },
+        )]
+        .iter()
+        .cloned()
+        .collect();
+        let mut copymap = HashMap::new();
+        copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
+        let parents = DirstateParents {
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
+        };
+        let now = Duration::new(15000000, 0);
+        let result =
+            pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
+                .unwrap();
+
+        let mut new_state_map: StateMap = HashMap::new();
+        let mut new_copy_map: CopyMap = HashMap::new();
+        let new_parents = parse_dirstate(
+            &mut new_state_map,
+            &mut new_copy_map,
+            result.as_slice(),
+        )
+        .unwrap();
+        assert_eq!(
+            (parents, state_map, copymap),
+            (new_parents, new_state_map, new_copy_map)
         )
     }
 
     #[test]
     fn test_parse_pack_multiple_entries_with_copy() {
-        let dirstate_vec: DirstateVec = vec![
+        let mut state_map: StateMap = [
             (
                 b"f1".to_vec(),
                 DirstateEntry {
@@ -311,39 +324,40 @@ 
                     mtime: -1,
                 },
             ),
-        ];
+        ]
+        .iter()
+        .cloned()
+        .collect();
         let mut copymap = HashMap::new();
         copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
         copymap.insert(b"f4\xF6".to_vec(), b"copyname2".to_vec());
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
+        let now = Duration::new(15000000, 0);
         let result =
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
+            pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
+                .unwrap();
 
+        let mut new_state_map: StateMap = HashMap::new();
+        let mut new_copy_map: CopyMap = HashMap::new();
+        let new_parents = parse_dirstate(
+            &mut new_state_map,
+            &mut new_copy_map,
+            result.as_slice(),
+        )
+        .unwrap();
         assert_eq!(
-            (parents, dirstate_vec, copymap),
-            parse_dirstate(result.0.as_slice())
-                .and_then(|(p, dvec, cvec)| Ok((
-                    p,
-                    dvec,
-                    cvec.iter()
-                        .map(|entry| (
-                            entry.path.to_vec(),
-                            entry.copy_path.to_vec()
-                        ))
-                        .collect()
-                )))
-                .unwrap()
+            (parents, state_map, copymap),
+            (new_parents, new_state_map, new_copy_map)
         )
     }
 
     #[test]
     /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4
     fn test_parse_pack_one_entry_with_copy_and_time_conflict() {
-        let dirstate_vec: DirstateVec = vec![(
+        let mut state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
                 state: 'n' as i8,
@@ -351,21 +365,34 @@ 
                 size: 0,
                 mtime: 15000000,
             },
-        )];
+        )]
+        .iter()
+        .cloned()
+        .collect();
         let mut copymap = HashMap::new();
         copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: b"12345678910111213141".to_vec(),
+            p2: b"00000000000000000000".to_vec(),
         };
-        let now: i32 = 15000000;
+        let now = Duration::new(15000000, 0);
         let result =
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
+            pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
+                .unwrap();
+
+        let mut new_state_map: StateMap = HashMap::new();
+        let mut new_copy_map: CopyMap = HashMap::new();
+        let new_parents = parse_dirstate(
+            &mut new_state_map,
+            &mut new_copy_map,
+            result.as_slice(),
+        )
+        .unwrap();
 
         assert_eq!(
             (
                 parents,
-                vec![(
+                [(
                     b"f1".to_vec(),
                     DirstateEntry {
                         state: 'n' as i8,
@@ -373,16 +400,13 @@ 
                         size: 0,
                         mtime: -1
                     }
-                )],
-                copymap
-                    .iter()
-                    .map(|(k, v)| CopyVecEntry {
-                        path: k.as_slice(),
-                        copy_path: v.as_slice()
-                    })
-                    .collect()
+                )]
+                .iter()
+                .cloned()
+                .collect::<StateMap>(),
+                copymap,
             ),
-            parse_dirstate(result.0.as_slice()).unwrap()
+            (new_parents, new_state_map, new_copy_map)
         )
     }
 }
diff --git a/rust/hg-core/src/dirstate/mod.rs b/rust/hg-core/src/dirstate/mod.rs
--- a/rust/hg-core/src/dirstate/mod.rs
+++ b/rust/hg-core/src/dirstate/mod.rs
@@ -1,16 +1,25 @@ 
+// dirstate module
+//
+// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+use std::collections::HashMap;
+
 pub mod dirs_multiset;
 pub mod parsers;
 
-#[derive(Debug, PartialEq, Copy, Clone)]
-pub struct DirstateParents<'a> {
-    pub p1: &'a [u8],
-    pub p2: &'a [u8],
+#[derive(Debug, PartialEq, Clone)]
+pub struct DirstateParents {
+    pub p1: Vec<u8>,
+    pub p2: Vec<u8>,
 }
 
 /// The C implementation uses all signed types. This will be an issue
 /// either when 4GB+ source files are commonplace or in 2038, whichever
 /// comes first.
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Copy, Clone)]
 pub struct DirstateEntry {
     pub state: i8,
     pub mode: i32,
@@ -18,19 +27,12 @@ 
     pub size: i32,
 }
 
-pub type DirstateVec = Vec<(Vec<u8>, DirstateEntry)>;
-
-#[derive(Debug, PartialEq)]
-pub struct CopyVecEntry<'a> {
-    pub path: &'a [u8],
-    pub copy_path: &'a [u8],
-}
-
-pub type CopyVec<'a> = Vec<CopyVecEntry<'a>>;
+pub type StateMap = HashMap<Vec<u8>, DirstateEntry>;
+pub type CopyMap = HashMap<Vec<u8>, Vec<u8>>;
 
 /// The Python implementation passes either a mapping (dirstate) or a flat
 /// iterable (manifest)
-pub enum DirsIterable {
-    Dirstate(DirstateVec),
-    Manifest(Vec<Vec<u8>>),
+pub enum DirsIterable<'a> {
+    Dirstate(&'a HashMap<Vec<u8>, DirstateEntry>),
+    Manifest(&'a Vec<Vec<u8>>),
 }
diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs b/rust/hg-core/src/dirstate/dirs_multiset.rs
--- a/rust/hg-core/src/dirstate/dirs_multiset.rs
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs
@@ -28,10 +28,10 @@ 
 
         match iterable {
             DirsIterable::Dirstate(vec) => {
-                for (ref filename, DirstateEntry { state, .. }) in vec {
+                for (filename, DirstateEntry { state, .. }) in vec {
                     // This `if` is optimized out of the loop
                     if let Some(skip) = skip_state {
-                        if skip != state {
+                        if skip != *state {
                             multiset.add_path(filename);
                         }
                     } else {
@@ -40,7 +40,7 @@ 
                 }
             }
             DirsIterable::Manifest(vec) => {
-                for ref filename in vec {
+                for filename in vec {
                     multiset.add_path(filename);
                 }
             }
@@ -140,10 +140,11 @@ 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use std::collections::HashMap;
 
     #[test]
     fn test_delete_path_path_not_found() {
-        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(&vec![]), None);
         let path = b"doesnotexist/";
         assert_eq!(
             Err(DirstateMapError::PathNotFound(path.to_vec())),
@@ -154,7 +155,7 @@ 
     #[test]
     fn test_delete_path_empty_path() {
         let mut map =
-            DirsMultiset::new(DirsIterable::Manifest(vec![vec![]]), None);
+            DirsMultiset::new(DirsIterable::Manifest(&vec![vec![]]), None);
         let path = b"";
         assert_eq!(Ok(()), map.delete_path(path));
         assert_eq!(
@@ -194,7 +195,7 @@ 
 
     #[test]
     fn test_add_path_empty_path() {
-        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(&vec![]), None);
         let path = b"";
         map.add_path(path);
 
@@ -203,7 +204,7 @@ 
 
     #[test]
     fn test_add_path_successful() {
-        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(&vec![]), None);
 
         map.add_path(b"a/");
         assert_eq!(1, *map.inner.get(&b"a".to_vec()).unwrap());
@@ -250,13 +251,13 @@ 
     fn test_dirsmultiset_new_empty() {
         use DirsIterable::{Dirstate, Manifest};
 
-        let new = DirsMultiset::new(Manifest(vec![]), None);
+        let new = DirsMultiset::new(Manifest(&vec![]), None);
         let expected = DirsMultiset {
             inner: HashMap::new(),
         };
         assert_eq!(expected, new);
 
-        let new = DirsMultiset::new(Dirstate(vec![]), None);
+        let new = DirsMultiset::new(Dirstate(&HashMap::new()), None);
         let expected = DirsMultiset {
             inner: HashMap::new(),
         };
@@ -276,7 +277,7 @@ 
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Manifest(input_vec), None);
+        let new = DirsMultiset::new(Manifest(&input_vec), None);
         let expected = DirsMultiset {
             inner: expected_inner,
         };
@@ -301,7 +302,7 @@ 
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Dirstate(input_map), None);
+        let new = DirsMultiset::new(Dirstate(&input_map), None);
         let expected = DirsMultiset {
             inner: expected_inner,
         };
@@ -321,7 +322,7 @@ 
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Manifest(input_vec), Some('n' as i8));
+        let new = DirsMultiset::new(Manifest(&input_vec), Some('n' as i8));
         let expected = DirsMultiset {
             inner: expected_inner,
         };
@@ -350,11 +351,10 @@ 
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Dirstate(input_map), Some('n' as i8));
+        let new = DirsMultiset::new(Dirstate(&input_map), Some('n' as i8));
         let expected = DirsMultiset {
             inner: expected_inner,
         };
         assert_eq!(expected, new);
     }
-
 }