Patchwork D6393: rust-dirstate: add "dirs" Rust implementation

login
register
mail settings
Submitter phabricator
Date May 17, 2019, 10:10 a.m.
Message ID <differential-rev-PHID-DREV-ntrpshzpvq7eeh3auaiv-req@phab.mercurial-scm.org>
Download mbox | patch
Permalink /patch/40109/
State New
Headers show

Comments

phabricator - May 17, 2019, 10:10 a.m.
Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  Following the work done in https://phab.mercurial-scm.org/rHGd1786c1d34fa927d4048054ebe62c55fa14f9b1e and working towards the goal of a
  complete Rust implementation of the dirstate, this rewrites the `dirs` class.
  
  There is already a C implementation, which relies heavily on CPython hacks and
  protocol violations for performance, so I don't expect this to perform as well
  for now, as this is very straight-forward code.
  The immediate benefits are new high-level documentation and some unit tests.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

AFFECTED FILES
  rust/hg-core/src/dirstate/dirs_multiset.rs
  rust/hg-core/src/dirstate/mod.rs
  rust/hg-core/src/lib.rs

CHANGE DETAILS




To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
phabricator - June 3, 2019, 9:37 p.m.
martinvonz added a comment.


  Does this need updating after https://phab.mercurial-scm.org/D6403? Based on a cursory look at the patch, it has not been updated yet.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

To: Alphare, #hg-reviewers
Cc: martinvonz, durin42, kevincox, mercurial-devel
phabricator - June 5, 2019, 7:05 p.m.
Alphare added a comment.


  In https://phab.mercurial-scm.org/D6393#94007, @martinvonz wrote:
  
  > Does this need updating after https://phab.mercurial-scm.org/D6403? Based on a cursory look at the patch, it has not been updated yet.
  
  
  Indeed it did. I've updated this changeset and rebased.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6393

To: Alphare, #hg-reviewers
Cc: martinvonz, durin42, kevincox, mercurial-devel

Patch

diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -15,8 +15,10 @@ 
 pub mod discovery;
 pub mod testing; // unconditionally built, for use from integration tests
 pub use dirstate::{
+    dirs_multiset::DirsMultiset,
     parsers::{pack_dirstate, parse_dirstate},
-    CopyVec, CopyVecEntry, DirstateEntry, DirstateParents, DirstateVec,
+    CopyVec, CopyVecEntry, DirsIterable, DirstateEntry, DirstateParents,
+    DirstateVec,
 };
 mod filepatterns;
 
@@ -72,6 +74,12 @@ 
     BadSize(usize, usize),
 }
 
+#[derive(Debug, PartialEq)]
+pub enum DirstateMapError {
+    PathNotFound(Vec<u8>),
+    EmptyPath,
+}
+
 impl From<std::io::Error> for DirstatePackError {
     fn from(e: std::io::Error) -> Self {
         DirstatePackError::CorruptedEntry(e.to_string())
diff --git a/rust/hg-core/src/dirstate/mod.rs b/rust/hg-core/src/dirstate/mod.rs
--- a/rust/hg-core/src/dirstate/mod.rs
+++ b/rust/hg-core/src/dirstate/mod.rs
@@ -1,3 +1,4 @@ 
+pub mod dirs_multiset;
 pub mod parsers;
 
 #[derive(Debug, PartialEq, Copy, Clone)]
@@ -26,3 +27,10 @@ 
 }
 
 pub type CopyVec<'a> = Vec<CopyVecEntry<'a>>;
+
+/// The Python implementation passes either a mapping (dirstate) or a flat
+/// iterable (manifest)
+pub enum DirsIterable {
+    Dirstate(DirstateVec),
+    Manifest(Vec<Vec<u8>>),
+}
diff --git a/rust/hg-core/src/dirstate/dirs_multiset.rs b/rust/hg-core/src/dirstate/dirs_multiset.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs
@@ -0,0 +1,342 @@ 
+// dirs_multiset.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! A multiset of directory names.
+//!
+//! Used to counts the references to directories in a manifest or dirstate.
+use std::collections::hash_map::Entry;
+use std::collections::HashMap;
+use std::ops::Deref;
+use {DirsIterable, DirstateEntry, DirstateMapError};
+
+#[derive(PartialEq, Debug)]
+pub struct DirsMultiset {
+    inner: HashMap<Vec<u8>, u32>,
+}
+
+impl Deref for DirsMultiset {
+    type Target = HashMap<Vec<u8>, u32>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.inner
+    }
+}
+
+impl DirsMultiset {
+    /// Initializes the multiset from a dirstate or a manifest.
+    ///
+    /// If `skip_state` is provided, skips dirstate entries with equal state.
+    pub fn new(iterable: DirsIterable, skip_state: Option<i8>) -> Self {
+        let mut multiset = DirsMultiset {
+            inner: HashMap::new(),
+        };
+
+        match iterable {
+            DirsIterable::Dirstate(vec) => {
+                for (filename, DirstateEntry { state, .. }) in vec {
+                    // This `if` is optimized out of the loop
+                    if let Some(skip) = skip_state {
+                        if skip != state {
+                            multiset.add_path(filename);
+                        }
+                    } else {
+                        multiset.add_path(filename);
+                    }
+                }
+            }
+            DirsIterable::Manifest(vec) => {
+                for filename in vec {
+                    multiset.add_path(filename);
+                }
+            }
+        }
+
+        multiset
+    }
+
+    /// Returns (maybe) a slice of path containing the next directory name
+    /// without trailing slash, from right to left.
+    fn find_dir(path: &[u8], mut pos: usize) -> Option<&[u8]> {
+        loop {
+            if let Some(new_pos) = pos.checked_sub(1) {
+                if path[new_pos] == b'/' {
+                    break Some(&path[..new_pos]);
+                }
+                pos = new_pos;
+            } else {
+                break None;
+            }
+        }
+    }
+
+    /// Increases the count of deepest directory contained in the path.
+    ///
+    /// If the directory is not yet in the map, adds its parents.
+    pub fn add_path(&mut self, path: Vec<u8>) {
+        if path.is_empty() {
+            return;
+        }
+        let mut pos = path.len();
+
+        while let Some(subpath) = Self::find_dir(&path, pos) {
+            if let Some(val) = self.inner.get_mut(subpath) {
+                *val += 1;
+                break;
+            }
+            self.inner.insert(subpath.to_owned(), 1);
+
+            pos = subpath.len();
+        }
+    }
+
+    /// Decreases the count of deepest directory contained in the path.
+    ///
+    /// If it is the only reference, decreases all parents until one is
+    /// removed.
+    /// If the directory is not in the map, something horrible has happened.
+    pub fn delete_path(
+        &mut self,
+        path: Vec<u8>,
+    ) -> Result<(), DirstateMapError> {
+        if path.is_empty() {
+            return Err(DirstateMapError::EmptyPath);
+        }
+        let mut pos = path.len();
+
+        while let Some(subpath) = Self::find_dir(&path, pos) {
+            match self.inner.entry(subpath.to_owned()) {
+                Entry::Occupied(mut entry) => {
+                    let val = entry.get().clone();
+                    if val > 1 {
+                        entry.insert(val - 1);
+                        break;
+                    }
+                    entry.remove();
+                }
+                Entry::Vacant(_) => {
+                    return Err(DirstateMapError::PathNotFound(path.to_owned()))
+                }
+            };
+
+            pos = subpath.len();
+        }
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_delete_path_path_not_found() {
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let path = b"doesnotexist/".to_vec();
+        assert_eq!(
+            Err(DirstateMapError::PathNotFound(path.to_owned())),
+            map.delete_path(path)
+        );
+    }
+
+    #[test]
+    fn test_delete_path_empty_path() {
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let path = vec![];
+        assert_eq!(Err(DirstateMapError::EmptyPath), map.delete_path(path));
+    }
+
+    #[test]
+    fn test_delete_path_successful() {
+        let mut map = DirsMultiset {
+            inner: [("a", 3), ("a/b", 2), ("a/c", 1)]
+                .iter()
+                .map(|(k, v)| (k.as_bytes().to_vec(), *v))
+                .collect(),
+        };
+
+        assert_eq!(Ok(()), map.delete_path(b"a/b/".to_vec()));
+        assert_eq!(Ok(()), map.delete_path(b"a/b/".to_vec()));
+        assert_eq!(
+            Err(DirstateMapError::PathNotFound(b"a/b/".to_vec())),
+            map.delete_path(b"a/b/".to_vec())
+        );
+
+        assert_eq!(2, *map.get(&b"a".to_vec()).unwrap());
+        assert_eq!(1, *map.get(&b"a/c".to_vec()).unwrap());
+        assert_eq!(Ok(()), map.delete_path(b"a/".to_vec()));
+
+        assert_eq!(Ok(()), map.delete_path(b"a/c/".to_vec()));
+        assert_eq!(
+            Err(DirstateMapError::PathNotFound(b"a/c/".to_vec())),
+            map.delete_path(b"a/c/".to_vec())
+        );
+    }
+
+    #[test]
+    fn test_add_path_empty_path() {
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let path = vec![];
+        map.add_path(path);
+
+        assert_eq!(0, map.len());
+    }
+
+    #[test]
+    fn test_add_path_successful() {
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+
+        map.add_path(b"a/".to_vec());
+        assert_eq!(1, *map.get(&b"a".to_vec()).unwrap());
+
+        // Non directory should be ignored
+        map.add_path(b"a".to_vec());
+        assert_eq!(1, *map.get(&b"a".to_vec()).unwrap());
+
+        // Non directory will still add its base
+        map.add_path(b"a/b".to_vec());
+        assert_eq!(2, *map.get(&b"a".to_vec()).unwrap());
+        assert_eq!(1, map.len());
+
+        // Duplicate path works
+        map.add_path(b"a/".to_vec());
+        assert_eq!(3, *map.get(&b"a".to_vec()).unwrap());
+
+        // Nested dir adds to its base
+        map.add_path(b"a/b/".to_vec());
+        assert_eq!(4, *map.get(&b"a".to_vec()).unwrap());
+        assert_eq!(1, *map.get(&b"a/b".to_vec()).unwrap());
+
+        // but not its base's base, because it already existed
+        map.add_path(b"a/b/c/".to_vec());
+        assert_eq!(4, *map.get(&b"a".to_vec()).unwrap());
+        assert_eq!(2, *map.get(&b"a/b".to_vec()).unwrap());
+
+        map.add_path(b"a/c/".to_vec());
+        assert_eq!(1, *map.get(&b"a/c".to_vec()).unwrap());
+
+        let expected = DirsMultiset {
+            inner: [("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
+                .iter()
+                .map(|(k, v)| (k.as_bytes().to_vec(), *v))
+                .collect(),
+        };
+        assert_eq!(map, expected);
+    }
+
+    #[test]
+    fn test_dirsmultiset_new_empty() {
+        use DirsIterable::{Dirstate, Manifest};
+
+        let new = DirsMultiset::new(Manifest(vec![]), None);
+        let expected = DirsMultiset {
+            inner: HashMap::new(),
+        };
+        assert_eq!(expected, new);
+
+        let new = DirsMultiset::new(Dirstate(vec![]), None);
+        let expected = DirsMultiset {
+            inner: HashMap::new(),
+        };
+        assert_eq!(expected, new);
+    }
+
+    #[test]
+    fn test_dirsmultiset_new_no_skip() {
+        use DirsIterable::{Dirstate, Manifest};
+
+        let input_vec = ["a/", "b/", "a/c", "a/d/"]
+            .iter()
+            .map(|e| e.as_bytes().to_vec())
+            .collect();
+        let expected_inner = [("a", 3), ("b", 1), ("a/d", 1)]
+            .iter()
+            .map(|(k, v)| (k.as_bytes().to_vec(), *v))
+            .collect();
+
+        let new = DirsMultiset::new(Manifest(input_vec), None);
+        let expected = DirsMultiset {
+            inner: expected_inner,
+        };
+        assert_eq!(expected, new);
+
+        let input_map = ["a/", "b/", "a/c", "a/d/"]
+            .iter()
+            .map(|f| {
+                (
+                    f.as_bytes().to_vec(),
+                    DirstateEntry {
+                        state: 0,
+                        mode: 0,
+                        mtime: 0,
+                        size: 0,
+                    },
+                )
+            })
+            .collect();
+        let expected_inner = [("a", 3), ("b", 1), ("a/d", 1)]
+            .iter()
+            .map(|(k, v)| (k.as_bytes().to_vec(), *v))
+            .collect();
+
+        let new = DirsMultiset::new(Dirstate(input_map), None);
+        let expected = DirsMultiset {
+            inner: expected_inner,
+        };
+        assert_eq!(expected, new);
+    }
+
+    #[test]
+    fn test_dirsmultiset_new_skip() {
+        use DirsIterable::{Dirstate, Manifest};
+
+        let input_vec = ["a/", "b/", "a/c", "a/d/"]
+            .iter()
+            .map(|e| e.as_bytes().to_vec())
+            .collect();
+        let expected_inner = [("a", 3), ("b", 1), ("a/d", 1)]
+            .iter()
+            .map(|(k, v)| (k.as_bytes().to_vec(), *v))
+            .collect();
+
+        let new = DirsMultiset::new(Manifest(input_vec), Some('n' as i8));
+        let expected = DirsMultiset {
+            inner: expected_inner,
+        };
+        // Skip does not affect a manifest
+        assert_eq!(expected, new);
+
+        let input_map =
+            [("a/", 'n'), ("a/b/", 'n'), ("a/c", 'r'), ("a/d/", 'm')]
+                .iter()
+                .map(|(f, state)| {
+                    (
+                        f.as_bytes().to_vec(),
+                        DirstateEntry {
+                            state: *state as i8,
+                            mode: 0,
+                            mtime: 0,
+                            size: 0,
+                        },
+                    )
+                })
+                .collect();
+
+        // "a" incremented with "a/c" and "a/d/"
+        let expected_inner = [("a", 2), ("a/d", 1)]
+            .iter()
+            .map(|(k, v)| (k.as_bytes().to_vec(), *v))
+            .collect();
+
+        let new = DirsMultiset::new(Dirstate(input_map), Some('n' as i8));
+        let expected = DirsMultiset {
+            inner: expected_inner,
+        };
+        assert_eq!(expected, new);
+    }
+
+}