Patchwork D10909: status: Extend read_dir caching to directories with ignored files

login
register
mail settings
Submitter phabricator
Date June 24, 2021, 8:52 p.m.
Message ID <differential-rev-PHID-DREV-lkdpwlsnqka64gmqqdtp-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/49223/
State Superseded
Headers show

Comments

phabricator - June 24, 2021, 8:52 p.m.
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  See code comments

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10909

AFFECTED FILES
  rust/hg-core/src/dirstate_tree/on_disk.rs
  rust/hg-core/src/dirstate_tree/status.rs

CHANGE DETAILS




To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-core/src/dirstate_tree/status.rs b/rust/hg-core/src/dirstate_tree/status.rs
--- a/rust/hg-core/src/dirstate_tree/status.rs
+++ b/rust/hg-core/src/dirstate_tree/status.rs
@@ -190,18 +190,21 @@ 
             // This happens for example with `hg status -mard`.
             return true;
         }
-        if let Some(cached_mtime) = cached_directory_mtime {
-            // The dirstate contains a cached mtime for this directory, set by
-            // a previous run of the `status` algorithm which found this
-            // directory eligible for `read_dir` caching.
-            if let Some(meta) = directory_metadata {
-                if let Ok(current_mtime) = meta.modified() {
-                    if current_mtime == cached_mtime.into() {
-                        // The mtime of that directory has not changed since
-                        // then, which means that the
-                        // results of `read_dir` should also
-                        // be unchanged.
-                        return true;
+        if !self.options.list_ignored
+            && self.ignore_patterns_have_changed == Some(false)
+        {
+            if let Some(cached_mtime) = cached_directory_mtime {
+                // The dirstate contains a cached mtime for this directory, set
+                // by a previous run of the `status` algorithm which found this
+                // directory eligible for `read_dir` caching.
+                if let Some(meta) = directory_metadata {
+                    if let Ok(current_mtime) = meta.modified() {
+                        if current_mtime == cached_mtime.into() {
+                            // The mtime of that directory has not changed
+                            // since then, which means that the results of
+                            // `read_dir` should also be unchanged.
+                            return true;
+                        }
                     }
                 }
             }
@@ -209,8 +212,8 @@ 
         false
     }
 
-    /// Returns whether the filesystem directory was found to have any entry
-    /// that does not have a corresponding dirstate tree node.
+    /// Returns whether all child entries of the filesystem directory have a
+    /// corresponding dirstate node or are ignored.
     fn traverse_fs_directory_and_dirstate(
         &self,
         has_ignored_ancestor: bool,
@@ -248,11 +251,10 @@ 
                 })
                 .collect::<Result<_, _>>()?;
 
-            // Conservatively don’t let the caller assume that there aren’t
-            // any, since we don’t know.
-            let directory_has_any_fs_only_entry = true;
+            // We don’t know, so conservatively say this isn’t the case
+            let children_all_have_dirstate_node_or_are_ignored = false;
 
-            return Ok(directory_has_any_fs_only_entry);
+            return Ok(children_all_have_dirstate_node_or_are_ignored);
         }
 
         let mut fs_entries = if let Ok(entries) = self.read_dir(
@@ -295,28 +297,32 @@ 
         .par_bridge()
         .map(|pair| {
             use itertools::EitherOrBoth::*;
-            let is_fs_only = pair.is_right();
+            let has_dirstate_node_or_is_ignored;
             match pair {
-                Both(dirstate_node, fs_entry) => self
-                    .traverse_fs_and_dirstate(
+                Both(dirstate_node, fs_entry) => {
+                    self.traverse_fs_and_dirstate(
                         &fs_entry.full_path,
                         &fs_entry.metadata,
                         dirstate_node,
                         has_ignored_ancestor,
-                    )?,
+                    )?;
+                    has_dirstate_node_or_is_ignored = true
+                }
                 Left(dirstate_node) => {
-                    self.traverse_dirstate_only(dirstate_node)?
+                    self.traverse_dirstate_only(dirstate_node)?;
+                    has_dirstate_node_or_is_ignored = true;
                 }
                 Right(fs_entry) => {
                     let hg_path = directory_hg_path.join(&fs_entry.base_name);
                     let is_ignored =
                         has_ignored_ancestor || (self.ignore_fn)(&hg_path);
                     self.traverse_fs_only(is_ignored, hg_path, fs_entry);
+                    has_dirstate_node_or_is_ignored = is_ignored;
                 }
             }
-            Ok(is_fs_only)
+            Ok(has_dirstate_node_or_is_ignored)
         })
-        .try_reduce(|| false, |a, b| Ok(a || b))
+        .try_reduce(|| true, |a, b| Ok(a && b))
     }
 
     fn traverse_fs_and_dirstate(
@@ -349,7 +355,7 @@ 
             }
             let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
             let is_at_repo_root = false;
-            let directory_has_any_fs_only_entry = self
+            let children_all_have_dirstate_node_or_are_ignored = self
                 .traverse_fs_directory_and_dirstate(
                     is_ignored,
                     dirstate_node.children(self.dmap.on_disk)?,
@@ -360,7 +366,7 @@ 
                     is_at_repo_root,
                 )?;
             self.maybe_save_directory_mtime(
-                directory_has_any_fs_only_entry,
+                children_all_have_dirstate_node_or_are_ignored,
                 fs_metadata,
                 dirstate_node,
             )?
@@ -411,11 +417,11 @@ 
 
     fn maybe_save_directory_mtime(
         &self,
-        directory_has_any_fs_only_entry: bool,
+        children_all_have_dirstate_node_or_are_ignored: bool,
         directory_metadata: &std::fs::Metadata,
         dirstate_node: NodeRef<'tree, 'on_disk>,
     ) -> Result<(), DirstateV2ParseError> {
-        if !directory_has_any_fs_only_entry {
+        if children_all_have_dirstate_node_or_are_ignored {
             // All filesystem directory entries from `read_dir` have a
             // corresponding node in the dirstate, so we can reconstitute the
             // names of those entries without calling `read_dir` again.
diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -98,13 +98,16 @@ 
     ///     and must cause a different modification time (unless the system
     ///     clock jumps back and we get unlucky, which is not impossible but
     ///     but deemed unlikely enough).
-    ///   - The directory did not contain any child entry that did not have a
-    ///     corresponding dirstate node.
+    ///   - All direct children of this directory (as returned by
+    ///     `std::fs::read_dir`) either have a corresponding dirstate node, or
+    ///     are ignored by ignore patterns whose hash is in
+    ///     `Header::ignore_patterns_hash`.
     ///
     ///   This means that if `std::fs::symlink_metadata` later reports the
-    ///   same modification time, we don’t need to call `std::fs::read_dir`
-    ///   again for this directory and can iterate child dirstate nodes
-    ///   instead.
+    ///   same modification time and ignored patterns haven’t changed, a run
+    ///   of status that is not listing ignored   files can skip calling
+    ///   `std::fs::read_dir` again for this directory,   iterate child
+    ///   dirstate nodes instead.
     state: u8,
     data: Entry,
 }