Patchwork D8215: rust-status: move to recursive traversal to prepare for parallel traversal

login
register
mail settings
Submitter phabricator
Date March 5, 2020, 8:34 a.m.
Message ID <a551c60f69ac984c32cb43acee792fa6@localhost.localdomain>
Download mbox | patch
Permalink /patch/45495/
State Not Applicable
Headers show

Comments

phabricator - March 5, 2020, 8:34 a.m.
Alphare updated this revision to Diff 20500.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D8215?vs=20465&id=20500

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D8215/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D8215

AFFECTED FILES
  rust/hg-core/src/dirstate/status.rs

CHANGE DETAILS




To: Alphare, #hg-reviewers
Cc: durin42, mercurial-devel

Patch

diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs
--- a/rust/hg-core/src/dirstate/status.rs
+++ b/rust/hg-core/src/dirstate/status.rs
@@ -26,7 +26,6 @@ 
 };
 use lazy_static::lazy_static;
 use rayon::prelude::*;
-use std::collections::VecDeque;
 use std::{
     borrow::Cow,
     collections::HashSet,
@@ -300,49 +299,53 @@ 
     pub list_ignored: bool,
 }
 
-/// Dispatch a single file found during `traverse`.
-/// If `file` is a folder that needs to be traversed, it will be pushed into
+/// Dispatch a single entry (file, folder, symlink...) found during `traverse`.
+/// If the entry is a folder that needs to be traversed, it will be pushed into
 /// `work`.
-fn traverse_worker<'a>(
-    work: &mut VecDeque<HgPathBuf>,
-    matcher: &impl Matcher,
+fn handle_traversed_entry<'a>(
+    dir_entry: &DirEntry,
+    matcher: &(impl Matcher + Sync),
+    root_dir: impl AsRef<Path>,
     dmap: &DirstateMap,
     filename: impl AsRef<HgPath>,
-    dir_entry: &DirEntry,
-    ignore_fn: &impl for<'r> Fn(&'r HgPath) -> bool,
-    dir_ignore_fn: &impl for<'r> Fn(&'r HgPath) -> bool,
+    old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
+    ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync),
+    dir_ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync),
     options: StatusOptions,
-) -> Option<IoResult<(Cow<'a, HgPath>, Dispatch)>> {
-    let file_type = match dir_entry.file_type() {
-        Ok(x) => x,
-        Err(e) => return Some(Err(e.into())),
-    };
+) -> IoResult<Vec<(Cow<'a, HgPath>, Dispatch)>> {
+    let file_type = dir_entry.file_type()?;
     let filename = filename.as_ref();
     let entry_option = dmap.get(filename);
 
     if file_type.is_dir() {
         // Do we need to traverse it?
         if !ignore_fn(&filename) || options.list_ignored {
-            work.push_front(filename.to_owned());
+            return traverse_dir(
+                matcher,
+                root_dir,
+                dmap,
+                filename.to_owned(),
+                &old_results,
+                ignore_fn,
+                dir_ignore_fn,
+                options,
+            );
         }
         // Nested `if` until `rust-lang/rust#53668` is stable
         if let Some(entry) = entry_option {
             // Used to be a file, is now a folder
             if matcher.matches_everything() || matcher.matches(&filename) {
-                return Some(Ok((
+                return Ok(vec![(
                     Cow::Owned(filename.to_owned()),
                     dispatch_missing(entry.state),
-                )));
+                )]);
             }
         }
     } else if file_type.is_file() || file_type.is_symlink() {
         if let Some(entry) = entry_option {
             if matcher.matches_everything() || matcher.matches(&filename) {
-                let metadata = match dir_entry.metadata() {
-                    Ok(x) => x,
-                    Err(e) => return Some(Err(e.into())),
-                };
-                return Some(Ok((
+                let metadata = dir_entry.metadata()?;
+                return Ok(vec![(
                     Cow::Owned(filename.to_owned()),
                     dispatch_found(
                         &filename,
@@ -351,7 +354,7 @@ 
                         &dmap.copy_map,
                         options,
                     ),
-                )));
+                )]);
             }
         } else if (matcher.matches_everything() || matcher.matches(&filename))
             && !ignore_fn(&filename)
@@ -360,28 +363,28 @@ 
                 && dir_ignore_fn(&filename)
             {
                 if options.list_ignored {
-                    return Some(Ok((
+                    return Ok(vec![(
                         Cow::Owned(filename.to_owned()),
                         Dispatch::Ignored,
-                    )));
+                    )]);
                 }
             } else {
-                return Some(Ok((
+                return Ok(vec![(
                     Cow::Owned(filename.to_owned()),
                     Dispatch::Unknown,
-                )));
+                )]);
             }
         }
     } else if let Some(entry) = entry_option {
         // Used to be a file or a folder, now something else.
         if matcher.matches_everything() || matcher.matches(&filename) {
-            return Some(Ok((
+            return Ok(vec![(
                 Cow::Owned(filename.to_owned()),
                 dispatch_missing(entry.state),
-            )));
+            )]);
         }
     }
-    None
+    return Ok(vec![]);
 }
 
 /// Walk the working directory recursively to look for changes compared to the
@@ -397,79 +400,99 @@ 
     options: StatusOptions,
 ) -> IoResult<FastHashMap<Cow<'a, HgPath>, Dispatch>> {
     let root_dir = root_dir.as_ref();
-    let mut new_results = FastHashMap::default();
 
-    let mut work = VecDeque::new();
-    work.push_front(path.as_ref().to_owned());
-
-    while let Some(ref directory) = work.pop_front() {
-        if directory.as_bytes() == b".hg" {
-            continue;
-        }
-        let visit_entries = match matcher.visit_children_set(directory) {
-            VisitChildrenSet::Empty => continue,
-            VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
-            VisitChildrenSet::Set(set) => Some(set),
-        };
-        let buf = hg_path_to_path_buf(directory)?;
-        let dir_path = root_dir.join(buf);
-
-        let skip_dot_hg = !directory.as_bytes().is_empty();
-        let entries = match list_directory(dir_path, skip_dot_hg) {
-            Err(e) => match e.kind() {
-                ErrorKind::NotFound | ErrorKind::PermissionDenied => {
-                    new_results.insert(
-                        Cow::Owned(directory.to_owned()),
-                        Dispatch::Bad(BadMatch::OsError(
-                            // Unwrapping here is OK because the error always
-                            // is a real os error
-                            e.raw_os_error().unwrap(),
-                        )),
-                    );
-                    continue;
-                }
-                _ => return Err(e),
-            },
-            Ok(entries) => entries,
-        };
-
-        for (filename, dir_entry) in entries {
-            if let Some(ref set) = visit_entries {
-                if !set.contains(filename.deref()) {
-                    continue;
-                }
-            }
-            // TODO normalize
-            let filename = if directory.is_empty() {
-                filename.to_owned()
-            } else {
-                directory.join(&filename)
-            };
-
-            if !old_results.contains_key(filename.deref()) {
-                if let Some((res, dispatch)) = traverse_worker(
-                    &mut work,
-                    matcher,
-                    &dmap,
-                    &filename,
-                    &dir_entry,
-                    &ignore_fn,
-                    &dir_ignore_fn,
-                    options,
-                )
-                .transpose()?
-                {
-                    new_results.insert(res, dispatch);
-                }
-            }
-        }
-    }
+    let mut new_results: FastHashMap<Cow<'a, HgPath>, Dispatch> =
+        traverse_dir(
+            matcher,
+            root_dir,
+            &dmap,
+            path,
+            &old_results,
+            &ignore_fn,
+            &dir_ignore_fn,
+            options,
+        )?
+        .into_par_iter()
+        .collect();
 
     new_results.extend(old_results.into_iter());
 
     Ok(new_results)
 }
 
+/// Decides whether the directory needs to be listed, and if so dispatches its
+/// entries
+fn traverse_dir<'a>(
+    matcher: &(impl Matcher + Sync),
+    root_dir: impl AsRef<Path>,
+    dmap: &DirstateMap,
+    path: impl AsRef<HgPath>,
+    old_results: &FastHashMap<Cow<'a, HgPath>, Dispatch>,
+    ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync),
+    dir_ignore_fn: &(impl for<'r> Fn(&'r HgPath) -> bool + Sync),
+    options: StatusOptions,
+) -> IoResult<Vec<(Cow<'a, HgPath>, Dispatch)>> {
+    let directory = path.as_ref();
+    if directory.as_bytes() == b".hg" {
+        return Ok(vec![]);
+    }
+    let visit_entries = match matcher.visit_children_set(directory) {
+        VisitChildrenSet::Empty => return Ok(vec![]),
+        VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
+        VisitChildrenSet::Set(set) => Some(set),
+    };
+    let buf = hg_path_to_path_buf(directory)?;
+    let dir_path = root_dir.as_ref().join(buf);
+
+    let skip_dot_hg = !directory.as_bytes().is_empty();
+    let entries = match list_directory(dir_path, skip_dot_hg) {
+        Err(e) => match e.kind() {
+            ErrorKind::NotFound | ErrorKind::PermissionDenied => {
+                return Ok(vec![(
+                    Cow::Owned(directory.to_owned()),
+                    Dispatch::Bad(BadMatch::OsError(
+                        // Unwrapping here is OK because the error always
+                        // is a real os error
+                        e.raw_os_error().unwrap(),
+                    )),
+                )]);
+            }
+            _ => return Err(e),
+        },
+        Ok(entries) => entries,
+    };
+
+    let mut new_results = vec![];
+    for (filename, dir_entry) in entries {
+        if let Some(ref set) = visit_entries {
+            if !set.contains(filename.deref()) {
+                continue;
+            }
+        }
+        // TODO normalize
+        let filename = if directory.is_empty() {
+            filename.to_owned()
+        } else {
+            directory.join(&filename)
+        };
+
+        if !old_results.contains_key(filename.deref()) {
+            new_results.extend(handle_traversed_entry(
+                &dir_entry,
+                matcher,
+                root_dir.as_ref(),
+                &dmap,
+                &filename,
+                old_results,
+                ignore_fn,
+                dir_ignore_fn,
+                options,
+            )?);
+        }
+    }
+    Ok(new_results)
+}
+
 /// Stat all entries in the `DirstateMap` and mark them for dispatch.
 fn stat_dmap_entries(
     dmap: &DirstateMap,
@@ -743,28 +766,5 @@ 
         }
     }
 
-    let results = results.into_iter().filter_map(|(filename, dispatch)| {
-        match dispatch {
-            Dispatch::Bad(_) => return Some((filename, dispatch)),
-            _ => {}
-        };
-        // TODO do this in //, not at the end
-        if !dmap.contains_key(filename.deref()) {
-            if (options.list_ignored || matcher.exact_match(&filename))
-                && dir_ignore_fn(&filename)
-            {
-                if options.list_ignored {
-                    return Some((filename.to_owned(), Dispatch::Ignored));
-                }
-            } else {
-                if !ignore_fn(&filename) {
-                    return Some((filename.to_owned(), Dispatch::Unknown));
-                }
-            }
-            return None;
-        }
-        Some((filename, dispatch))
-    });
-
     Ok((build_response(results), warnings))
 }