Patchwork D6632: rust-dirstate: rust implementation of dirstatemap

login
register
mail settings
Submitter phabricator
Date July 10, 2019, 2:34 p.m.
Message ID <differential-rev-PHID-DREV-og4sph4pwgqr66smgbh6-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/40869/
State Superseded
Headers show

Comments

phabricator - July 10, 2019, 2:34 p.m.
Alphare created this revision.
Herald added subscribers: mercurial-devel, kevincox, durin42.
Herald added a reviewer: hg-reviewers.

REVISION SUMMARY
  The `dirstatemap` is one of the last building blocks needed to get to a
  `dirstate.walk` Rust implementation.
  
  Disclaimer: This change is part of a big (10) series of patches, all of which
  started as one big changeset that took a long time to write.
  This `dirstatemap` implementation is a compromise in terms of complexity both
  for me and for the reviewers. I chose to submit this patch right now because
  while it is not perfect, it works and is simple enough (IMHO) to be reviewed.
  
  The Python implementation uses a lot of lazy propertycaches, breaks
  encapsulation and is used as an iterator in a lot of places, all of which
  dictated the somewhat unidiomatic patterns in this change.
  Like written in the comments, rewriting this struct to use the typestate
  pattern might be a good idea, but this is a good first step.

REPOSITORY
  rHG Mercurial

REVISION DETAIL
  https://phab.mercurial-scm.org/D6632

AFFECTED FILES
  rust/hg-core/src/dirstate/dirstate_map.rs
  rust/hg-core/src/dirstate/mod.rs
  rust/hg-core/src/lib.rs

CHANGE DETAILS




To: Alphare, #hg-reviewers
Cc: durin42, kevincox, mercurial-devel
phabricator - July 22, 2019, 3:10 p.m.
kevincox added inline comments.
kevincox accepted this revision.

INLINE COMMENTS

> dirstate_map.rs:191
> +                        changed = true;
> +                        *entry = DirstateEntry {
> +                            mtime: MTIME_UNSET,

Can you just do `entry.mtime = MTIME_UNSET`?

> dirstate_map.rs:288
> +    pub fn set_parents(&mut self, parents: DirstateParents) {
> +        self.parents = Some(parents.clone());
> +        self.dirty_parents = true;

This `.clone()` looks unnecessary. If it is necessary you should probably accept a `&DirstateParents`.

> dirstate_map.rs:306
> +
> +        if !self.dirty_parents {
> +            self.set_parents(parents.to_owned());

It isn't clear to me why we don't `set_parents` if `dirty_parents`. This would make me think that `parents` is lazily calculated however `set_parents` sets `parents` and `dirty_parents` which would imply that this is not the case.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D6632/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D6632

To: Alphare, #hg-reviewers, kevincox
Cc: durin42, kevincox, mercurial-devel

Patch

diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -18,6 +18,7 @@ 
 pub mod testing; // unconditionally built, for use from integration tests
 pub use dirstate::{
     dirs_multiset::DirsMultiset,
+    dirstate_map::DirstateMap,
     parsers::{pack_dirstate, parse_dirstate},
     CopyMap, DirsIterable, DirstateEntry, DirstateParents, EntryState,
     StateMap,
diff --git a/rust/hg-core/src/dirstate/mod.rs b/rust/hg-core/src/dirstate/mod.rs
--- a/rust/hg-core/src/dirstate/mod.rs
+++ b/rust/hg-core/src/dirstate/mod.rs
@@ -10,6 +10,7 @@ 
 use DirstateParseError;
 
 pub mod dirs_multiset;
+pub mod dirstate_map;
 pub mod parsers;
 
 #[derive(Debug, PartialEq, Clone)]
diff --git a/rust/hg-core/src/dirstate/dirstate_map.rs b/rust/hg-core/src/dirstate/dirstate_map.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/dirstate/dirstate_map.rs
@@ -0,0 +1,431 @@ 
+// dirstate_map.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+use core::borrow::Borrow;
+use dirstate::parsers::PARENT_SIZE;
+use dirstate::EntryState;
+use std::collections::{HashMap, HashSet};
+use std::iter::FromIterator;
+use std::ops::Deref;
+use std::time::Duration;
+use {
+    pack_dirstate, parse_dirstate, CopyMap, DirsIterable, DirsMultiset,
+    DirstateEntry, DirstateError, DirstateMapError, DirstateParents,
+    DirstateParseError, StateMap,
+};
+
+pub type FileFoldMap = HashMap<Vec<u8>, Vec<u8>>;
+
+const NULL_REVISION: &[u8] = b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+const MTIME_UNSET: i32 = -1;
+const SIZE_DIRTY: i32 = -2;
+
+#[derive(Default)]
+pub struct DirstateMap {
+    state_map: StateMap,
+    pub copy_map: CopyMap,
+    file_fold_map: Option<FileFoldMap>,
+    pub dirs: Option<DirsMultiset>,
+    pub all_dirs: Option<DirsMultiset>,
+    non_normal_set: HashSet<Vec<u8>>,
+    other_parent_set: HashSet<Vec<u8>>,
+    parents: Option<DirstateParents>,
+    dirty_parents: bool,
+}
+
+/// Should only really be used in python interface code, for clarity
+impl Deref for DirstateMap {
+    type Target = StateMap;
+
+    fn deref(&self) -> &Self::Target {
+        &self.state_map
+    }
+}
+
+impl FromIterator<(Vec<u8>, DirstateEntry)> for DirstateMap {
+    fn from_iter<I: IntoIterator<Item = (Vec<u8>, DirstateEntry)>>(
+        iter: I,
+    ) -> Self {
+        Self {
+            state_map: iter.into_iter().collect(),
+            ..Self::default()
+        }
+    }
+}
+
+impl DirstateMap {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn clear(&mut self) {
+        self.state_map.clear();
+        self.copy_map.clear();
+        self.file_fold_map = None;
+        self.non_normal_set.clear();
+        self.other_parent_set.clear();
+        self.set_parents(DirstateParents {
+            p1: NULL_REVISION.to_vec(),
+            p2: NULL_REVISION.to_vec(),
+        })
+    }
+
+    /// Add a tracked file to the dirstate
+    pub fn add_file(
+        &mut self,
+        filename: &[u8],
+        old_state: EntryState,
+        entry: DirstateEntry,
+    ) {
+        if old_state == EntryState::Unknown || old_state == EntryState::Removed
+        {
+            if let Some(ref mut dirs) = self.dirs {
+                dirs.add_path(filename)
+            }
+        }
+        if old_state == EntryState::Unknown {
+            if let Some(ref mut all_dirs) = self.all_dirs {
+                all_dirs.add_path(filename)
+            }
+        }
+        self.state_map.insert(filename.to_owned(), entry.to_owned());
+
+        if entry.state != EntryState::Normal || entry.mtime == MTIME_UNSET {
+            self.non_normal_set.insert(filename.to_owned());
+        }
+
+        if entry.size == SIZE_DIRTY {
+            self.other_parent_set.insert(filename.to_owned());
+        }
+    }
+
+    /// Mark a file as removed in the dirstate.
+    ///
+    /// The `size` parameter is used to store sentinel values that indicate
+    /// the file's previous state.  In the future, we should refactor this
+    /// to be more explicit about what that state is.
+    pub fn remove_file(
+        &mut self,
+        filename: &[u8],
+        old_state: EntryState,
+        size: i32,
+    ) -> Result<(), DirstateMapError> {
+        if old_state != EntryState::Unknown && old_state != EntryState::Removed
+        {
+            if let Some(ref mut dirs) = self.dirs {
+                dirs.delete_path(filename)?;
+            }
+        }
+        if old_state == EntryState::Unknown {
+            if let Some(ref mut all_dirs) = self.all_dirs {
+                all_dirs.add_path(filename);
+            }
+        }
+
+        if let Some(ref mut file_fold_map) = self.file_fold_map {
+            file_fold_map
+                .remove::<Vec<u8>>(filename.to_ascii_uppercase().as_ref());
+        }
+        self.state_map.insert(
+            filename.to_owned(),
+            DirstateEntry {
+                state: EntryState::Removed,
+                mode: 0,
+                size,
+                mtime: 0,
+            },
+        );
+        self.non_normal_set.insert(filename.to_owned());
+        Ok(())
+    }
+
+    /// Remove a file from the dirstate.
+    /// Returns `true` if the file was previously recorded.
+    pub fn drop_file(
+        &mut self,
+        filename: &[u8],
+        old_state: EntryState,
+    ) -> Result<bool, DirstateMapError> {
+        let exists = self
+            .state_map
+            .remove::<Vec<u8>>(filename.to_owned().as_ref())
+            .is_some();
+
+        if exists {
+            if old_state != EntryState::Removed {
+                if let Some(ref mut dirs) = self.dirs {
+                    dirs.delete_path(filename)?;
+                }
+            }
+            if let Some(ref mut all_dirs) = self.all_dirs {
+                all_dirs.delete_path(filename)?;
+            }
+        }
+        if let Some(ref mut file_fold_map) = self.file_fold_map {
+            file_fold_map
+                .remove::<Vec<u8>>(filename.to_ascii_uppercase().as_ref());
+        }
+        self.non_normal_set
+            .remove::<Vec<u8>>(filename.to_owned().as_ref());
+
+        Ok(exists)
+    }
+
+    pub fn clear_ambiguous_times(
+        &mut self,
+        filenames: Vec<Vec<u8>>,
+        now: i32,
+    ) {
+        for filename in filenames {
+            let mut changed = false;
+            self.state_map
+                .entry(filename.to_owned())
+                .and_modify(|entry| {
+                    if entry.state == EntryState::Normal && entry.mtime == now
+                    {
+                        changed = true;
+                        *entry = DirstateEntry {
+                            mtime: MTIME_UNSET,
+                            ..*entry
+                        };
+                    }
+                });
+            if changed {
+                self.non_normal_set.insert(filename.to_owned());
+            }
+        }
+    }
+
+    pub fn non_normal_other_parent_entries(
+        &self,
+    ) -> (HashSet<Vec<u8>>, HashSet<Vec<u8>>) {
+        let mut non_normal = HashSet::new();
+        let mut other_parent = HashSet::new();
+
+        for (
+            filename,
+            DirstateEntry {
+                state, size, mtime, ..
+            },
+        ) in self.state_map.iter()
+        {
+            if *state != EntryState::Normal || *mtime == MTIME_UNSET {
+                non_normal.insert(filename.to_owned());
+            }
+            if *state == EntryState::Normal && *size == SIZE_DIRTY {
+                other_parent.insert(filename.to_owned());
+            }
+        }
+
+        (non_normal, other_parent)
+    }
+    /// Both of these setters and their uses appear to be the simplest way to
+    /// emulate a Python lazy property, but it is ugly and unidiomatic.
+    /// TODO One day, rewriting this struct using the typestate might be a
+    /// good idea.
+    pub fn set_all_dirs(&mut self) {
+        if self.all_dirs.is_none() {
+            self.all_dirs = Some(DirsMultiset::new(
+                DirsIterable::Dirstate(&self.state_map),
+                None,
+            ));
+        }
+    }
+
+    pub fn set_dirs(&mut self) {
+        if self.dirs.is_none() {
+            self.dirs = Some(DirsMultiset::new(
+                DirsIterable::Dirstate(&self.state_map),
+                Some(EntryState::Removed),
+            ));
+        }
+    }
+
+    pub fn has_tracked_dir(&mut self, directory: &[u8]) -> bool {
+        self.set_dirs();
+        self.dirs.as_ref().unwrap().contains_key(directory.as_ref())
+    }
+
+    pub fn has_dir(&mut self, directory: &[u8]) -> bool {
+        self.set_all_dirs();
+        self.all_dirs
+            .as_ref()
+            .unwrap()
+            .contains_key(directory.as_ref())
+    }
+
+    pub fn parents(
+        &mut self,
+        file_contents: &[u8],
+    ) -> Result<DirstateParents, DirstateError> {
+        if let Some(ref parents) = self.parents {
+            return Ok(parents.clone());
+        }
+        let parents;
+        if file_contents.len() == 40 {
+            parents = DirstateParents {
+                p1: file_contents[..PARENT_SIZE].to_owned(),
+                p2: file_contents[PARENT_SIZE..PARENT_SIZE * 2].to_owned(),
+            };
+        } else if file_contents.is_empty() {
+            parents = DirstateParents {
+                p1: NULL_REVISION.to_owned(),
+                p2: NULL_REVISION.to_owned(),
+            };
+        } else {
+            return Err(DirstateError::Parse(DirstateParseError::Damaged));
+        }
+
+        self.parents = Some(parents.to_owned());
+        Ok(parents.clone())
+    }
+
+    pub fn set_parents(&mut self, parents: DirstateParents) {
+        self.parents = Some(parents.clone());
+        self.dirty_parents = true;
+    }
+
+    pub fn read(
+        &mut self,
+        file_contents: &[u8],
+    ) -> Result<Option<DirstateParents>, DirstateError> {
+        if file_contents.is_empty() {
+            return Ok(None);
+        }
+
+        let parents = parse_dirstate(
+            &mut self.state_map,
+            &mut self.copy_map,
+            file_contents,
+        )?;
+
+        if !self.dirty_parents {
+            self.set_parents(parents.to_owned());
+        }
+
+        Ok(Some(parents))
+    }
+
+    pub fn pack(
+        &mut self,
+        parents: DirstateParents,
+        now: Duration,
+    ) -> Result<Vec<u8>, DirstateError> {
+        let packed =
+            pack_dirstate(&mut self.state_map, &self.copy_map, parents, now)?;
+
+        self.dirty_parents = false;
+
+        let result = self.non_normal_other_parent_entries();
+        self.non_normal_set = result.0;
+        self.other_parent_set = result.1;
+        Ok(packed)
+    }
+    pub fn build_file_fold_map(&mut self) -> FileFoldMap {
+        if let Some(ref file_fold_map) = self.file_fold_map {
+            return file_fold_map.to_owned();
+        }
+        let mut new_file_fold_map = FileFoldMap::new();
+        for (filename, DirstateEntry { state, .. }) in self.state_map.borrow()
+        {
+            if *state == EntryState::Removed {
+                new_file_fold_map.insert(
+                    filename.to_ascii_uppercase().to_owned(),
+                    filename.to_owned(),
+                );
+            }
+        }
+        self.file_fold_map = Some(new_file_fold_map);
+        self.file_fold_map.to_owned().unwrap()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_dirs_multiset() {
+        let mut map = DirstateMap::new();
+        assert!(map.dirs.is_none());
+        assert!(map.all_dirs.is_none());
+
+        assert_eq!(false, map.has_dir(b"nope"));
+        assert!(map.all_dirs.is_some());
+        assert!(map.dirs.is_none());
+
+        assert_eq!(false, map.has_tracked_dir(b"nope"));
+        assert!(map.dirs.is_some());
+    }
+
+    #[test]
+    fn test_add_file() {
+        let mut map = DirstateMap::new();
+
+        assert_eq!(0, map.len());
+
+        map.add_file(
+            b"meh",
+            EntryState::Normal,
+            DirstateEntry {
+                state: EntryState::Normal,
+                mode: 1337,
+                mtime: 1337,
+                size: 1337,
+            },
+        );
+
+        assert_eq!(1, map.len());
+        assert_eq!(0, map.non_normal_set.len());
+        assert_eq!(0, map.other_parent_set.len());
+    }
+
+    #[test]
+    fn test_non_normal_other_parent_entries() {
+        let map: DirstateMap = [
+            (b"f1", (EntryState::Removed, 1337, 1337, 1337)),
+            (b"f2", (EntryState::Normal, 1337, 1337, -1)),
+            (b"f3", (EntryState::Normal, 1337, 1337, 1337)),
+            (b"f4", (EntryState::Normal, 1337, -2, 1337)),
+            (b"f5", (EntryState::Added, 1337, 1337, 1337)),
+            (b"f6", (EntryState::Added, 1337, 1337, -1)),
+            (b"f7", (EntryState::Merged, 1337, 1337, -1)),
+            (b"f8", (EntryState::Merged, 1337, 1337, 1337)),
+            (b"f9", (EntryState::Merged, 1337, -2, 1337)),
+            (b"fa", (EntryState::Added, 1337, -2, 1337)),
+            (b"fb", (EntryState::Removed, 1337, -2, 1337)),
+        ]
+        .iter()
+        .map(|(fname, (state, mode, size, mtime))| {
+            (
+                fname.to_vec(),
+                DirstateEntry {
+                    state: *state,
+                    mode: *mode,
+                    size: *size,
+                    mtime: *mtime,
+                },
+            )
+        })
+        .collect();
+
+        let non_normal = [
+            b"f1", b"f2", b"f5", b"f6", b"f7", b"f8", b"f9", b"fa", b"fb",
+        ]
+        .iter()
+        .map(|x| x.to_vec())
+        .collect();
+
+        let mut other_parent = HashSet::new();
+        other_parent.insert(b"f4".to_vec());
+
+        assert_eq!(
+            (non_normal, other_parent),
+            map.non_normal_other_parent_entries()
+        );
+    }
+}