Patchwork D8518: rust-status: collect traversed directories if required

login
register
mail settings
Submitter phabricator
Date May 12, 2020, 11:51 a.m.
Message ID <differential-rev-PHID-DREV-z53b4eecz4vz5ykhstw5-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/46304/
State Superseded
Headers show

Comments

phabricator - May 12, 2020, 11:51 a.m.
Alphare created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Some commands (`hg purge` notably) register the `traversedir` callback on their
  matcher to run said callback every time a directory is traversed.
  
  This is the first of three patches, further broadening Rust support for status.
  
  Unfortunately, there is no way around collecting a full `Vec` (or any other
  owned datastructure, like a radix tree) and pushing it back up the Python layer
  since keeping the Python callback in a closure would mean giving up
  multithreading because of the GIL, which is obviously unacceptable.
  
  Performance is still a lot better than the Python+C path.
  
  Running `hg clean/purge` on Netbeans' repo (100k files):
  
         | No-op | 30% unknown
    --------------------------
    Rust | 1.0s  | 1.67s
    C    | 2.0s  | 2.87s

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D8518

AFFECTED FILES
  rust/hg-core/src/dirstate/status.rs

CHANGE DETAILS




To: Alphare, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs
--- a/rust/hg-core/src/dirstate/status.rs
+++ b/rust/hg-core/src/dirstate/status.rs
@@ -221,6 +221,7 @@ 
     dmap: &'a DirstateMap,
     root_dir: impl AsRef<Path> + Sync + Send + 'a,
     options: StatusOptions,
+    traversed_sender: crossbeam::Sender<HgPathBuf>,
 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
     files
         .unwrap_or(&DEFAULT_WORK)
@@ -255,6 +256,13 @@ 
                         Some(Ok((normalized, Dispatch::Unknown)))
                     } else {
                         if file_type.is_dir() {
+                            if options.collect_traversed_dirs {
+                                // The receiver always outlives the sender,
+                                // so unwrap.
+                                traversed_sender
+                                    .send(normalized.to_owned())
+                                    .unwrap()
+                            }
                             Some(Ok((
                                 normalized,
                                 Dispatch::Directory {
@@ -302,6 +310,9 @@ 
     pub list_clean: bool,
     pub list_unknown: bool,
     pub list_ignored: bool,
+    /// Whether to collect traversed dirs for applying a callback later.
+    /// Used by `hg purge` for example.
+    pub collect_traversed_dirs: bool,
 }
 
 /// Dispatch a single entry (file, folder, symlink...) found during `traverse`.
@@ -319,6 +330,7 @@ 
     options: StatusOptions,
     filename: HgPathBuf,
     dir_entry: DirEntry,
+    traversed_sender: crossbeam::Sender<HgPathBuf>,
 ) -> IoResult<()> {
     let file_type = dir_entry.file_type()?;
     let entry_option = dmap.get(&filename);
@@ -341,6 +353,7 @@ 
             options,
             entry_option,
             filename,
+            traversed_sender,
         );
     } else if file_type.is_file() || file_type.is_symlink() {
         if let Some(entry) = entry_option {
@@ -407,6 +420,7 @@ 
     options: StatusOptions,
     entry_option: Option<&'a DirstateEntry>,
     directory: HgPathBuf,
+    traversed_sender: crossbeam::Sender<HgPathBuf>,
 ) {
     scope.spawn(move |_| {
         // Nested `if` until `rust-lang/rust#53668` is stable
@@ -433,6 +447,7 @@ 
                 ignore_fn,
                 dir_ignore_fn,
                 options,
+                traversed_sender,
             )
             .unwrap_or_else(|e| files_sender.send(Err(e)).unwrap())
         }
@@ -451,9 +466,15 @@ 
     ignore_fn: &IgnoreFnType,
     dir_ignore_fn: &IgnoreFnType,
     options: StatusOptions,
+    traversed_sender: crossbeam::Sender<HgPathBuf>,
 ) -> IoResult<()> {
     let directory = directory.as_ref();
 
+    if options.collect_traversed_dirs {
+        // The receiver always outlives the sender, so unwrap.
+        traversed_sender.send(directory.to_owned()).unwrap()
+    }
+
     let visit_entries = match matcher.visit_children_set(directory) {
         VisitChildrenSet::Empty => return Ok(()),
         VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
@@ -510,6 +531,7 @@ 
                     options,
                     filename,
                     dir_entry,
+                    traversed_sender.clone(),
                 )?;
             }
         }
@@ -533,6 +555,7 @@ 
     dir_ignore_fn: &IgnoreFnType,
     options: StatusOptions,
     results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
+    traversed_sender: crossbeam::Sender<HgPathBuf>,
 ) -> IoResult<()> {
     let root_dir = root_dir.as_ref();
 
@@ -550,6 +573,7 @@ 
         &ignore_fn,
         &dir_ignore_fn,
         options,
+        traversed_sender,
     )?;
 
     // Disconnect the channel so the receiver stops waiting
@@ -640,11 +664,14 @@ 
     pub ignored: Vec<Cow<'a, HgPath>>,
     pub unknown: Vec<Cow<'a, HgPath>>,
     pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>,
+    /// Only filled if `collect_traversed_dirs` is `true`
+    pub traversed: Vec<HgPathBuf>,
 }
 
 #[timed]
 fn build_response<'a>(
     results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>,
+    traversed: Vec<HgPathBuf>,
 ) -> (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>) {
     let mut lookup = vec![];
     let mut modified = vec![];
@@ -683,6 +710,7 @@ 
             ignored,
             unknown,
             bad,
+            traversed,
         },
     )
 }
@@ -849,8 +877,17 @@ 
 
     let files = matcher.file_set();
 
+    // `crossbeam::Sender` is `Send`, while `mpsc::Sender` is not.
+    let (traversed_sender, traversed_recv) = crossbeam::channel::unbounded();
+
     // Step 1: check the files explicitly mentioned by the user
-    let explicit = walk_explicit(files, &dmap, root_dir, options);
+    let explicit = walk_explicit(
+        files,
+        &dmap,
+        root_dir,
+        options,
+        traversed_sender.clone(),
+    );
 
     // Collect results into a `Vec` because we do very few lookups in most
     // cases.
@@ -888,6 +925,7 @@ 
                             &dir_ignore_fn,
                             options,
                             &mut results,
+                            traversed_sender.clone(),
                         )?;
                     }
                 }
@@ -911,5 +949,9 @@ 
         }
     }
 
-    Ok((build_response(results), warnings))
+    // Close the channel
+    drop(traversed_sender);
+    let traversed_dirs = traversed_recv.into_iter().collect();
+
+    Ok((build_response(results, traversed_dirs), warnings))
 }