Patchwork D10365: dirstate-tree: Add `WithBasename` wrapper for `HgPath`

login
register
mail settings
Submitter phabricator
Date April 12, 2021, 12:24 p.m.
Message ID <differential-rev-PHID-DREV-ewpjjgginyxopynkx7hg-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/48679/
State Superseded
Headers show

Comments

phabricator - April 12, 2021, 12:24 p.m.
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  In the tree-shaped dirstate we want to have nodes representing files or
  directories, where directory nodes contain a map associating "base" names
  to child nodes for child files and directories.
  
  Many dirstate operations expect a full path from the repository root, but
  re-concatenating string from nested map keys all the time might be expensive.
  Instead, `WithBasename` stores a full path for these operations but
  behaves as its base name (last path component) for equality and comparison.
  
  Additionally `inclusive_ancestors` provides the successive map keys
  that are needed when inserting a new dirstate node at a given full path.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10365

AFFECTED FILES
  rust/hg-core/src/dirstate_tree.rs
  rust/hg-core/src/dirstate_tree/path_with_basename.rs

CHANGE DETAILS




To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-core/src/dirstate_tree/path_with_basename.rs b/rust/hg-core/src/dirstate_tree/path_with_basename.rs
new file mode 100644
--- /dev/null
+++ b/rust/hg-core/src/dirstate_tree/path_with_basename.rs
@@ -0,0 +1,159 @@ 
+use crate::utils::hg_path::HgPath;
+use std::borrow::Borrow;
+
+/// Wraps `HgPath` or `HgPathBuf` to make it behave "as" its last path
+/// component, a.k.a. its base name (as in Python’s `os.path.basename`), but
+/// also allow recovering the full path.
+///
+/// "Behaving as" means that equality and comparison consider only the base
+/// name, and `std::borrow::Borrow` is implemented to return only the base
+/// name. This allows using the base name as a map key while still being able
+/// to recover the full path, in a single memory allocation.
+#[derive(Debug)]
+pub struct WithBasename<T> {
+    full_path: T,
+
+    /// The position after the last slash separator in `full_path`, or `0`
+    /// if there is no slash.
+    base_name_start: usize,
+}
+
+impl<T> WithBasename<T> {
+    pub fn full_path(&self) -> &T {
+        &self.full_path
+    }
+}
+
+impl<T: AsRef<HgPath>> WithBasename<T> {
+    pub fn new(full_path: T) -> Self {
+        let base_name_start = if let Some(last_slash_position) = full_path
+            .as_ref()
+            .as_bytes()
+            .iter()
+            .rposition(|&byte| byte == b'/')
+        {
+            last_slash_position + 1
+        } else {
+            0
+        };
+        Self {
+            base_name_start,
+            full_path,
+        }
+    }
+
+    pub fn base_name(&self) -> &HgPath {
+        HgPath::new(
+            &self.full_path.as_ref().as_bytes()[self.base_name_start..],
+        )
+    }
+}
+
+impl<T: AsRef<HgPath>> Borrow<HgPath> for WithBasename<T> {
+    fn borrow(&self) -> &HgPath {
+        self.base_name()
+    }
+}
+
+impl<T: AsRef<HgPath> + PartialEq> PartialEq for WithBasename<T> {
+    fn eq(&self, other: &Self) -> bool {
+        self.base_name() == other.base_name()
+    }
+}
+
+impl<T: AsRef<HgPath> + Eq> Eq for WithBasename<T> {}
+
+impl<T: AsRef<HgPath> + PartialOrd> PartialOrd for WithBasename<T> {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        self.base_name().partial_cmp(other.base_name())
+    }
+}
+
+impl<T: AsRef<HgPath> + Ord> Ord for WithBasename<T> {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.base_name().cmp(other.base_name())
+    }
+}
+
+impl<T: ?Sized + ToOwned> WithBasename<&'_ T> {
+    pub fn to_owned(&self) -> WithBasename<T::Owned> {
+        WithBasename {
+            full_path: self.full_path.to_owned(),
+            base_name_start: self.base_name_start,
+        }
+    }
+}
+
+impl<'a> WithBasename<&'a HgPath> {
+    /// Returns an iterator of `WithBasename<&HgPath>` for the ancestor
+    /// directory paths of the given `path`, as well as `path` itself.
+    ///
+    /// For example, the full paths of inclusive ancestors of "a/b/c" are "a",
+    /// "a/b", and "a/b/c" in that order.
+    pub fn inclusive_ancestors_of(
+        path: &'a HgPath,
+    ) -> impl Iterator<Item = WithBasename<&'a HgPath>> {
+        let mut slash_positions =
+            path.as_bytes().iter().enumerate().filter_map(|(i, &byte)| {
+                if byte == b'/' {
+                    Some(i)
+                } else {
+                    None
+                }
+            });
+        let mut opt_next_component_start = Some(0);
+        std::iter::from_fn(move || {
+            opt_next_component_start.take().map(|next_component_start| {
+                if let Some(slash_pos) = slash_positions.next() {
+                    opt_next_component_start = Some(slash_pos + 1);
+                    Self {
+                        full_path: HgPath::new(&path.as_bytes()[..slash_pos]),
+                        base_name_start: next_component_start,
+                    }
+                } else {
+                    // Not setting `opt_next_component_start` here: there will
+                    // be no iteration after this one because `.take()` set it
+                    // to `None`.
+                    Self {
+                        full_path: path,
+                        base_name_start: next_component_start,
+                    }
+                }
+            })
+        })
+    }
+}
+
+#[test]
+fn test() {
+    let a = WithBasename::new(HgPath::new("a").to_owned());
+    assert_eq!(&**a.full_path(), HgPath::new(b"a"));
+    assert_eq!(a.base_name(), HgPath::new(b"a"));
+
+    let cba = WithBasename::new(HgPath::new("c/b/a").to_owned());
+    assert_eq!(&**cba.full_path(), HgPath::new(b"c/b/a"));
+    assert_eq!(cba.base_name(), HgPath::new(b"a"));
+
+    assert_eq!(a, cba);
+    let borrowed: &HgPath = cba.borrow();
+    assert_eq!(borrowed, HgPath::new("a"));
+}
+
+#[test]
+fn test_inclusive_ancestors() {
+    let mut iter = WithBasename::inclusive_ancestors_of(HgPath::new("a/bb/c"));
+
+    let next = iter.next().unwrap();
+    assert_eq!(*next.full_path(), HgPath::new("a"));
+    assert_eq!(next.base_name(), HgPath::new("a"));
+
+    let next = iter.next().unwrap();
+    assert_eq!(*next.full_path(), HgPath::new("a/bb"));
+    assert_eq!(next.base_name(), HgPath::new("bb"));
+
+    let next = iter.next().unwrap();
+    assert_eq!(*next.full_path(), HgPath::new("a/bb/c"));
+    assert_eq!(next.base_name(), HgPath::new("c"));
+
+    assert!(iter.next().is_none());
+}
diff --git a/rust/hg-core/src/dirstate_tree.rs b/rust/hg-core/src/dirstate_tree.rs
--- a/rust/hg-core/src/dirstate_tree.rs
+++ b/rust/hg-core/src/dirstate_tree.rs
@@ -1,2 +1,3 @@ 
 pub mod dirstate_map;
 pub mod dispatch;
+pub mod path_with_basename;