Patchwork D10744: dirstate-tree: Remove DirstateMap::iter_node_data_mut

login
register
mail settings
Submitter phabricator
Date May 19, 2021, 4:38 p.m.
Message ID <differential-rev-PHID-DREV-ck2ryngy7pw25sx3ssu7-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/49065/
State Superseded
Headers show

Comments

phabricator - May 19, 2021, 4:38 p.m.
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  In an upcoming changeset we want DirstateMap to be able to work directly
  with nodes in their "on disk" representation, without always allocating
  corresponding in-memory data structures. Nodes would have two possible
  representations: one immutable "on disk" refering to the bytes buffer
  of the contents of the .hg/dirstate file, and one mutable with HashMap
  like the curren data structure.
  
  These nodes would have copy-on-write semantics: when an immutable node
  would need to be mutated, instead we allocate new mutable node for it and
  its ancestors.
  
  A mutable iterator of the entire tree would still be possible, but it would
  become much more expensive since we’d need to allocate mutable nodes for
  everything.
  
  Instead, remove this iterator. It was only used to clear ambiguous mtimes
  while serializing the `DirstateMap`. Instead clearing and serialization are
  now two separate passes. Clearing first uses an immutable iterator to collect
  the paths of nodes that need to be cleared, then accesses only those nodes
  mutably.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10744

AFFECTED FILES
  rust/hg-core/src/dirstate/dirstate_map.rs
  rust/hg-core/src/dirstate/parsers.rs
  rust/hg-core/src/dirstate_tree/dirstate_map.rs
  rust/hg-core/src/dirstate_tree/on_disk.rs

CHANGE DETAILS




To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -9,8 +9,6 @@ 
 //! Nodes in turn contain slices to variable-size paths, and to their own child
 //! nodes (if any) for nested files and directories.
 
-use crate::dirstate::parsers::clear_ambiguous_mtime;
-use crate::dirstate::parsers::Timestamp;
 use crate::dirstate_tree::dirstate_map::{self, DirstateMap};
 use crate::dirstate_tree::path_with_basename::WithBasename;
 use crate::errors::HgError;
@@ -230,11 +228,7 @@ 
 pub(super) fn write(
     dirstate_map: &mut DirstateMap,
     parents: DirstateParents,
-    now: Timestamp,
 ) -> Result<Vec<u8>, DirstateError> {
-    // TODO: how do we want to handle this in 2038?
-    let now: i32 = now.0.try_into().expect("time overflow");
-
     let header_len = std::mem::size_of::<Header>();
 
     // This ignores the space for paths, and for nodes without an entry.
@@ -248,7 +242,7 @@ 
     // actual offset for the root nodes.
     out.resize(header_len, 0_u8);
 
-    let root = write_nodes(&mut dirstate_map.root, now, &mut out)?;
+    let root = write_nodes(&mut dirstate_map.root, &mut out)?;
 
     let header = Header {
         marker: *V2_FORMAT_MARKER,
@@ -266,7 +260,6 @@ 
 /// Serialize the dirstate to the `v2` format after clearing ambigous `mtime`s.
 fn write_nodes(
     nodes: &mut dirstate_map::ChildNodes,
-    now: i32,
     out: &mut Vec<u8>,
 ) -> Result<ChildNodes, DirstateError> {
     // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
@@ -277,7 +270,7 @@ 
     let mut on_disk_nodes = Vec::with_capacity(nodes.len());
     for (full_path, node) in nodes {
         on_disk_nodes.push(Node {
-            children: write_nodes(&mut node.children, now, out)?,
+            children: write_nodes(&mut node.children, out)?,
             tracked_descendants_count: node.tracked_descendants_count.into(),
             full_path: write_slice::<u8>(
                 full_path.full_path().as_bytes(),
@@ -296,7 +289,6 @@ 
                 }
             },
             entry: if let Some(entry) = &mut node.entry {
-                clear_ambiguous_mtime(entry, now);
                 OptEntry {
                     state: entry.state.into(),
                     mode: entry.mode.into(),
diff --git a/rust/hg-core/src/dirstate_tree/dirstate_map.rs b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs
@@ -6,7 +6,6 @@ 
 
 use super::on_disk;
 use super::path_with_basename::WithBasename;
-use crate::dirstate::parsers::clear_ambiguous_mtime;
 use crate::dirstate::parsers::pack_entry;
 use crate::dirstate::parsers::packed_entry_size;
 use crate::dirstate::parsers::parse_dirstate_entries;
@@ -79,13 +78,6 @@ 
     }
 }
 
-/// `(full_path, entry, copy_source)`
-type NodeDataMut<'tree, 'on_disk> = (
-    &'tree HgPath,
-    &'tree mut Option<DirstateEntry>,
-    &'tree mut Option<Cow<'on_disk, HgPath>>,
-);
-
 impl<'on_disk> DirstateMap<'on_disk> {
     pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
         Self {
@@ -252,7 +244,7 @@ 
 
     fn iter_nodes<'a>(
         &'a self,
-    ) -> impl Iterator<Item = (&'a HgPath, &'a Node)> + 'a {
+    ) -> impl Iterator<Item = (&'a Cow<'on_disk, HgPath>, &'a Node)> + 'a {
         // Depth first tree traversal.
         //
         // If we could afford internal iteration and recursion,
@@ -279,7 +271,7 @@ 
                 // Pseudo-recursion
                 let new_iter = child_node.children.iter();
                 let old_iter = std::mem::replace(&mut iter, new_iter);
-                let key = &**key.full_path();
+                let key = key.full_path();
                 stack.push((key, child_node, old_iter));
             }
             // Found the end of a `children.iter()` iterator.
@@ -296,42 +288,16 @@ 
         })
     }
 
-    /// Mutable iterator for the `(entry, copy source)` of each node.
-    ///
-    /// It would not be safe to yield mutable references to nodes themeselves
-    /// with `-> impl Iterator<Item = &mut Node>` since child nodes are
-    /// reachable from their ancestor nodes, potentially creating multiple
-    /// `&mut` references to a given node.
-    fn iter_node_data_mut<'tree>(
-        &'tree mut self,
-    ) -> impl Iterator<Item = NodeDataMut<'tree, 'on_disk>> + 'tree {
-        // Explict stack for pseudo-recursion, see `iter_nodes` above.
-        let mut stack = Vec::new();
-        let mut iter = self.root.iter_mut();
-        std::iter::from_fn(move || {
-            while let Some((key, child_node)) = iter.next() {
-                // Pseudo-recursion
-                let data = (
-                    &**key.full_path(),
-                    &mut child_node.entry,
-                    &mut child_node.copy_source,
-                );
-                let new_iter = child_node.children.iter_mut();
-                let old_iter = std::mem::replace(&mut iter, new_iter);
-                stack.push((data, old_iter));
+    fn clear_known_ambiguous_mtimes(&mut self, paths: &[impl AsRef<HgPath>]) {
+        for path in paths {
+            if let Some(node) =
+                Self::get_node_mut(&mut self.root, path.as_ref())
+            {
+                if let Some(entry) = node.entry.as_mut() {
+                    entry.clear_mtime();
+                }
             }
-            // Found the end of a `children.values_mut()` iterator.
-            if let Some((data, next_iter)) = stack.pop() {
-                // "Return" from pseudo-recursion by restoring state from the
-                // explicit stack
-                iter = next_iter;
-
-                Some(data)
-            } else {
-                // Reached the bottom of the stack, we’re done
-                None
-            }
-        })
+        }
     }
 }
 
@@ -427,7 +393,7 @@ 
         for filename in filenames {
             if let Some(node) = Self::get_node_mut(&mut self.root, &filename) {
                 if let Some(entry) = node.entry.as_mut() {
-                    clear_ambiguous_mtime(entry, now);
+                    entry.clear_ambiguous_mtime(now);
                 }
             }
         }
@@ -453,7 +419,7 @@ 
                 .filter(|entry| {
                     entry.is_non_normal() || entry.is_from_other_parent()
                 })
-                .map(|_| path)
+                .map(|_| &**path)
         }))
     }
 
@@ -475,7 +441,7 @@ 
             node.entry
                 .as_ref()
                 .filter(|entry| entry.is_non_normal())
-                .map(|_| path)
+                .map(|_| &**path)
         }))
     }
 
@@ -486,7 +452,7 @@ 
             node.entry
                 .as_ref()
                 .filter(|entry| entry.is_from_other_parent())
-                .map(|_| path)
+                .map(|_| &**path)
         }))
     }
 
@@ -522,29 +488,33 @@ 
         parents: DirstateParents,
         now: Timestamp,
     ) -> Result<Vec<u8>, DirstateError> {
+        let now: i32 = now.0.try_into().expect("time overflow");
+        let mut ambiguous_mtimes = Vec::new();
         // Optizimation (to be measured?): pre-compute size to avoid `Vec`
         // reallocations
         let mut size = parents.as_bytes().len();
         for (path, node) in self.iter_nodes() {
-            if node.entry.is_some() {
+            if let Some(entry) = &node.entry {
                 size += packed_entry_size(
                     path,
                     node.copy_source.as_ref().map(|p| &**p),
-                )
+                );
+                if entry.mtime_is_ambigous(now) {
+                    ambiguous_mtimes.push(path.clone())
+                }
             }
         }
+        self.clear_known_ambiguous_mtimes(&ambiguous_mtimes);
 
         let mut packed = Vec::with_capacity(size);
         packed.extend(parents.as_bytes());
 
-        let now: i32 = now.0.try_into().expect("time overflow");
-        for (path, opt_entry, copy_source) in self.iter_node_data_mut() {
-            if let Some(entry) = opt_entry {
-                clear_ambiguous_mtime(entry, now);
+        for (path, node) in self.iter_nodes() {
+            if let Some(entry) = &node.entry {
                 pack_entry(
                     path,
                     entry,
-                    copy_source.as_ref().map(|p| &**p),
+                    node.copy_source.as_ref().map(|p| &**p),
                     &mut packed,
                 );
             }
@@ -558,7 +528,21 @@ 
         parents: DirstateParents,
         now: Timestamp,
     ) -> Result<Vec<u8>, DirstateError> {
-        on_disk::write(self, parents, now)
+        // TODO: how do we want to handle this in 2038?
+        let now: i32 = now.0.try_into().expect("time overflow");
+        let mut paths = Vec::new();
+        for (path, node) in self.iter_nodes() {
+            if let Some(entry) = &node.entry {
+                if entry.mtime_is_ambigous(now) {
+                    paths.push(path.clone())
+                }
+            }
+        }
+        // Borrow of `self` ends here since we collect cloned paths
+
+        self.clear_known_ambiguous_mtimes(&paths);
+
+        on_disk::write(self, parents)
     }
 
     fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
@@ -592,7 +576,7 @@ 
         Box::new(self.iter_nodes().filter_map(|(path, node)| {
             node.copy_source
                 .as_ref()
-                .map(|copy_source| (path, &**copy_source))
+                .map(|copy_source| (&**path, &**copy_source))
         }))
     }
 
@@ -649,7 +633,7 @@ 
 
     fn iter(&self) -> StateMapIter<'_> {
         Box::new(self.iter_nodes().filter_map(|(path, node)| {
-            node.entry.as_ref().map(|entry| (path, entry))
+            node.entry.as_ref().map(|entry| (&**path, entry))
         }))
     }
 }
diff --git a/rust/hg-core/src/dirstate/parsers.rs b/rust/hg-core/src/dirstate/parsers.rs
--- a/rust/hg-core/src/dirstate/parsers.rs
+++ b/rust/hg-core/src/dirstate/parsers.rs
@@ -126,25 +126,31 @@ 
 /// Seconds since the Unix epoch
 pub struct Timestamp(pub u64);
 
-pub fn clear_ambiguous_mtime(
-    entry: &mut DirstateEntry,
-    mtime_now: i32,
-) -> bool {
-    let ambiguous =
-        entry.state == EntryState::Normal && entry.mtime == mtime_now;
-    if ambiguous {
-        // The file was last modified "simultaneously" with the current
-        // write to dirstate (i.e. within the same second for file-
-        // systems with a granularity of 1 sec). This commonly happens
-        // for at least a couple of files on 'update'.
-        // The user could change the file without changing its size
-        // within the same second. Invalidate the file's mtime in
-        // dirstate, forcing future 'status' calls to compare the
-        // contents of the file if the size is the same. This prevents
-        // mistakenly treating such files as clean.
-        entry.mtime = -1;
+impl DirstateEntry {
+    pub fn mtime_is_ambigous(&self, now: i32) -> bool {
+        self.state == EntryState::Normal && self.mtime == now
     }
-    ambiguous
+
+    pub fn clear_ambiguous_mtime(&mut self, now: i32) -> bool {
+        let ambiguous = self.mtime_is_ambigous(now);
+        if ambiguous {
+            // The file was last modified "simultaneously" with the current
+            // write to dirstate (i.e. within the same second for file-
+            // systems with a granularity of 1 sec). This commonly happens
+            // for at least a couple of files on 'update'.
+            // The user could change the file without changing its size
+            // within the same second. Invalidate the file's mtime in
+            // dirstate, forcing future 'status' calls to compare the
+            // contents of the file if the size is the same. This prevents
+            // mistakenly treating such files as clean.
+            self.clear_mtime()
+        }
+        ambiguous
+    }
+
+    pub fn clear_mtime(&mut self) {
+        self.mtime = -1;
+    }
 }
 
 pub fn pack_dirstate(
@@ -170,7 +176,7 @@ 
     packed.extend(parents.p2.as_bytes());
 
     for (filename, entry) in state_map.iter_mut() {
-        clear_ambiguous_mtime(entry, now);
+        entry.clear_ambiguous_mtime(now);
         pack_entry(
             filename,
             entry,
diff --git a/rust/hg-core/src/dirstate/dirstate_map.rs b/rust/hg-core/src/dirstate/dirstate_map.rs
--- a/rust/hg-core/src/dirstate/dirstate_map.rs
+++ b/rust/hg-core/src/dirstate/dirstate_map.rs
@@ -5,7 +5,6 @@ 
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2 or any later version.
 
-use crate::dirstate::parsers::clear_ambiguous_mtime;
 use crate::dirstate::parsers::Timestamp;
 use crate::{
     dirstate::EntryState,
@@ -166,7 +165,7 @@ 
     ) {
         for filename in filenames {
             if let Some(entry) = self.state_map.get_mut(&filename) {
-                if clear_ambiguous_mtime(entry, now) {
+                if entry.clear_ambiguous_mtime(now) {
                     self.get_non_normal_other_parent_entries()
                         .0
                         .insert(filename.to_owned());