Patchwork D7929: rust-status: add bare `hg status` support in hg-core

login
register
mail settings
Submitter phabricator
Date Feb. 11, 2020, 10:55 a.m.
Message ID <649dbed2d07088a72d8e43ab1efeb2d6@localhost.localdomain>
Download mbox | patch
Permalink /patch/45175/
State Not Applicable
Headers show

Comments

phabricator - Feb. 11, 2020, 10:55 a.m.
Alphare updated this revision to Diff 20157.

REPOSITORY
  rHG Mercurial

CHANGES SINCE LAST UPDATE
  https://phab.mercurial-scm.org/D7929?vs=20048&id=20157

BRANCH
  default

CHANGES SINCE LAST ACTION
  https://phab.mercurial-scm.org/D7929/new/

REVISION DETAIL
  https://phab.mercurial-scm.org/D7929

AFFECTED FILES
  rust/hg-core/src/dirstate/status.rs
  rust/hg-core/src/lib.rs

CHANGE DETAILS




To: Alphare, #hg-reviewers
Cc: marmoute, kevincox, mercurial-devel

Patch

diff --git a/rust/hg-core/src/lib.rs b/rust/hg-core/src/lib.rs
--- a/rust/hg-core/src/lib.rs
+++ b/rust/hg-core/src/lib.rs
@@ -13,7 +13,9 @@ 
     dirs_multiset::{DirsMultiset, DirsMultisetIter},
     dirstate_map::DirstateMap,
     parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
-    status::{status, DirstateStatus, StatusOptions},
+    status::{
+        status, BadMatch, BadType, DirstateStatus, StatusError, StatusOptions,
+    },
     CopyMap, CopyMapIter, DirstateEntry, DirstateParents, EntryState,
     StateMap, StateMapIter,
 };
diff --git a/rust/hg-core/src/dirstate/status.rs b/rust/hg-core/src/dirstate/status.rs
--- a/rust/hg-core/src/dirstate/status.rs
+++ b/rust/hg-core/src/dirstate/status.rs
@@ -11,22 +11,31 @@ 
 
 use crate::{
     dirstate::SIZE_FROM_OTHER_PARENT,
-    matchers::{Matcher, VisitChildrenSet},
+    filepatterns::PatternFileWarning,
+    matchers::{get_ignore_function, Matcher, VisitChildrenSet},
     utils::{
-        files::HgMetadata,
+        files::{find_dirs, HgMetadata},
         hg_path::{
             hg_path_to_path_buf, os_string_to_hg_path_buf, HgPath, HgPathBuf,
+            HgPathError,
         },
+        path_auditor::PathAuditor,
     },
     CopyMap, DirstateEntry, DirstateMap, EntryState, FastHashMap,
+    PatternError,
 };
+use lazy_static::lazy_static;
 use rayon::prelude::*;
-use std::borrow::Cow;
-use std::collections::{HashSet, VecDeque};
-use std::fs::{read_dir, DirEntry};
-use std::io::ErrorKind;
-use std::ops::Deref;
-use std::path::Path;
+use std::collections::VecDeque;
+use std::{
+    borrow::Cow,
+    collections::HashSet,
+    fs::{read_dir, DirEntry},
+    io::ErrorKind,
+    ops::Deref,
+    path::Path,
+    sync::mpsc,
+};
 
 /// Wrong type of file from a `BadMatch`
 /// Note: a lot of those don't exist on all platforms.
@@ -50,6 +59,7 @@ 
 /// Marker enum used to dispatch new status entries into the right collections.
 /// Is similar to `crate::EntryState`, but represents the transient state of
 /// entries during the lifetime of a command.
+#[derive(Debug)]
 enum Dispatch {
     Unsure,
     Modified,
@@ -150,7 +160,7 @@ 
             } else if options.list_clean {
                 Dispatch::Clean
             } else {
-                Dispatch::Unknown
+                Dispatch::None
             }
         }
         EntryState::Merged => Dispatch::Modified,
@@ -174,57 +184,95 @@ 
     }
 }
 
+lazy_static! {
+    static ref DEFAULT_WORK: HashSet<&'static HgPath> = {
+        let mut h = HashSet::new();
+        h.insert(HgPath::new(b""));
+        h
+    };
+}
+
 /// Get stat data about the files explicitly specified by match.
 /// TODO subrepos
 fn walk_explicit<'a>(
-    files: &'a HashSet<&HgPath>,
+    files: Option<&'a HashSet<&HgPath>>,
     dmap: &'a DirstateMap,
     root_dir: impl AsRef<Path> + Sync + Send,
+    work: mpsc::Sender<&'a HgPath>,
     options: StatusOptions,
-) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
-    files.par_iter().filter_map(move |filename| {
-        // TODO normalization
-        let normalized = filename.as_ref();
+) -> impl ParallelIterator<Item = IoResult<(Cow<'a, HgPath>, Dispatch)>> {
+    files
+        .unwrap_or(&DEFAULT_WORK)
+        .par_iter()
+        .map_with(work, move |work, filename| {
+            // TODO normalization
+            let normalized = filename.as_ref();
 
-        let buf = match hg_path_to_path_buf(normalized) {
-            Ok(x) => x,
-            Err(e) => return Some(Err(e.into())),
-        };
-        let target = root_dir.as_ref().join(buf);
-        let st = target.symlink_metadata();
-        match st {
-            Ok(meta) => {
-                let file_type = meta.file_type();
-                if file_type.is_file() || file_type.is_symlink() {
-                    if let Some(entry) = dmap.get(normalized) {
+            let buf = match hg_path_to_path_buf(normalized) {
+                Ok(x) => x,
+                Err(e) => return Some(Err(e.into())),
+            };
+            let target = root_dir.as_ref().join(buf);
+            let st = target.symlink_metadata();
+            let in_dmap = dmap.get(normalized);
+            match st {
+                Ok(meta) => {
+                    let file_type = meta.file_type();
+                    if file_type.is_file() || file_type.is_symlink() {
+                        if let Some(entry) = in_dmap {
+                            return Some(Ok((
+                                Cow::Borrowed(normalized),
+                                dispatch_found(
+                                    &normalized,
+                                    *entry,
+                                    HgMetadata::from_metadata(meta),
+                                    &dmap.copy_map,
+                                    options,
+                                ),
+                            )));
+                        }
                         return Some(Ok((
-                            normalized,
-                            dispatch_found(
-                                &normalized,
-                                *entry,
-                                HgMetadata::from_metadata(meta),
-                                &dmap.copy_map,
-                                options,
-                            ),
+                            Cow::Borrowed(normalized),
+                            Dispatch::Unknown,
+                        )));
+                    } else {
+                        if file_type.is_dir() {
+                            // The channel always outlives the sender, unwrap
+                            work.send(normalized).unwrap()
+                        } else {
+                            return Some(Ok((
+                                Cow::Borrowed(normalized),
+                                Dispatch::Bad(BadMatch::BadType(
+                                    // TODO do more than unknown
+                                    // Support for all `BadType` variant
+                                    // varies greatly between platforms.
+                                    // So far, no tests check the type and this
+                                    // should be good enough for most users.
+                                    BadType::Unknown,
+                                )),
+                            )));
+                        }
+                        if in_dmap.is_some() {
+                            return Some(Ok((
+                                Cow::Borrowed(normalized),
+                                Dispatch::Removed,
+                            )));
+                        }
+                    }
+                }
+                Err(_) => {
+                    if let Some(entry) = in_dmap {
+                        return Some(Ok((
+                            Cow::Borrowed(normalized),
+                            dispatch_missing(entry.state),
                         )));
                     }
-                } else {
-                    if dmap.contains_key(normalized) {
-                        return Some(Ok((normalized, Dispatch::Removed)));
-                    }
                 }
-            }
-            Err(_) => {
-                if let Some(entry) = dmap.get(normalized) {
-                    return Some(Ok((
-                        normalized,
-                        dispatch_missing(entry.state),
-                    )));
-                }
-            }
-        };
-        None
-    })
+            };
+            None
+        })
+        .filter(|s| s.is_some())
+        .map(|s| s.unwrap())
 }
 
 #[derive(Debug, Copy, Clone)]
@@ -400,13 +448,12 @@ 
     Ok(new_results)
 }
 
-/// Stat all entries in the `DirstateMap` and mark them for dispatch into
-/// the relevant collections.
+/// Stat all entries in the `DirstateMap` and mark them for dispatch.
 fn stat_dmap_entries(
     dmap: &DirstateMap,
     root_dir: impl AsRef<Path> + Sync + Send,
     options: StatusOptions,
-) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
+) -> impl ParallelIterator<Item = IoResult<(Cow<HgPath>, Dispatch)>> {
     dmap.par_iter().map(move |(filename, entry)| {
         let filename: &HgPath = filename;
         let filename_as_path = hg_path_to_path_buf(filename)?;
@@ -417,10 +464,10 @@ 
                 if !(m.file_type().is_file()
                     || m.file_type().is_symlink()) =>
             {
-                Ok((filename, dispatch_missing(entry.state)))
+                Ok((Cow::Borrowed(filename), dispatch_missing(entry.state)))
             }
             Ok(m) => Ok((
-                filename,
+                Cow::Borrowed(filename),
                 dispatch_found(
                     filename,
                     *entry,
@@ -430,14 +477,14 @@ 
                 ),
             )),
             Err(ref e)
-                if e.kind() == std::io::ErrorKind::NotFound
+                if e.kind() == ErrorKind::NotFound
                     || e.raw_os_error() == Some(20) =>
             {
                 // Rust does not yet have an `ErrorKind` for
                 // `NotADirectory` (errno 20)
                 // It happens if the dirstate contains `foo/bar` and
                 // foo is not a directory
-                Ok((filename, dispatch_missing(entry.state)))
+                Ok((Cow::Borrowed(filename), dispatch_missing(entry.state)))
             }
             Err(e) => Err(e),
         }
@@ -445,19 +492,19 @@ 
 }
 
 pub struct DirstateStatus<'a> {
-    pub modified: Vec<&'a HgPath>,
-    pub added: Vec<&'a HgPath>,
-    pub removed: Vec<&'a HgPath>,
-    pub deleted: Vec<&'a HgPath>,
-    pub clean: Vec<&'a HgPath>,
-    pub ignored: Vec<&'a HgPath>,
-    pub unknown: Vec<&'a HgPath>,
-    pub bad: Vec<(&'a HgPath, BadMatch)>,
+    pub modified: Vec<Cow<'a, HgPath>>,
+    pub added: Vec<Cow<'a, HgPath>>,
+    pub removed: Vec<Cow<'a, HgPath>>,
+    pub deleted: Vec<Cow<'a, HgPath>>,
+    pub clean: Vec<Cow<'a, HgPath>>,
+    pub ignored: Vec<Cow<'a, HgPath>>,
+    pub unknown: Vec<Cow<'a, HgPath>>,
+    pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>,
 }
 
 fn build_response<'a>(
-    results: impl IntoIterator<Item = IoResult<(&'a HgPath, Dispatch)>>,
-) -> IoResult<(Vec<&'a HgPath>, DirstateStatus<'a>)> {
+    results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>,
+) -> (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>) {
     let mut lookup = vec![];
     let mut modified = vec![];
     let mut added = vec![];
@@ -468,8 +515,7 @@ 
     let mut unknown = vec![];
     let mut bad = vec![];
 
-    for res in results.into_iter() {
-        let (filename, dispatch) = res?;
+    for (filename, dispatch) in results.into_iter() {
         match dispatch {
             Dispatch::Unknown => unknown.push(filename),
             Dispatch::Unsure => lookup.push(filename),
@@ -484,7 +530,7 @@ 
         }
     }
 
-    Ok((
+    (
         lookup,
         DirstateStatus {
             modified,
@@ -496,25 +542,189 @@ 
             unknown,
             bad,
         },
-    ))
+    )
+}
+
+pub enum StatusError {
+    IO(std::io::Error),
+    Path(HgPathError),
+    Pattern(PatternError),
 }
 
+pub type StatusResult<T> = Result<T, StatusError>;
+
+impl From<PatternError> for StatusError {
+    fn from(e: PatternError) -> Self {
+        StatusError::Pattern(e)
+    }
+}
+impl From<HgPathError> for StatusError {
+    fn from(e: HgPathError) -> Self {
+        StatusError::Path(e)
+    }
+}
+impl From<std::io::Error> for StatusError {
+    fn from(e: std::io::Error) -> Self {
+        StatusError::IO(e)
+    }
+}
+
+impl ToString for StatusError {
+    fn to_string(&self) -> String {
+        match self {
+            StatusError::IO(e) => e.to_string(),
+            StatusError::Path(e) => e.to_string(),
+            StatusError::Pattern(e) => e.to_string(),
+        }
+    }
+}
+
+/// Get the status of files in the working directory.
+///
+/// This is the current entry-point for `hg-core` and is realistically unusable
+/// outside of a Python context because its arguments need to provide a lot of
+/// information that will not be necessary in the future.
 pub fn status<'a: 'c, 'b: 'c, 'c>(
     dmap: &'a DirstateMap,
-    matcher: &'b impl Matcher,
+    matcher: &'b (impl Matcher + Sync),
     root_dir: impl AsRef<Path> + Sync + Send + Copy,
+    ignore_files: &[impl AsRef<Path>],
     options: StatusOptions,
-) -> IoResult<(Vec<&'c HgPath>, DirstateStatus<'c>)> {
+) -> StatusResult<(
+    (Vec<Cow<'c, HgPath>>, DirstateStatus<'c>),
+    Vec<PatternFileWarning>,
+)> {
+    let (ignore_fn, warnings) = get_ignore_function(&ignore_files, root_dir)?;
+    let dir_ignore_fn = |dir: &_| {
+        if ignore_fn(dir) {
+            true
+        } else {
+            for p in find_dirs(dir) {
+                if ignore_fn(p) {
+                    return true;
+                }
+            }
+            false
+        }
+    };
+
     let files = matcher.file_set();
+
     let mut results = vec![];
-    if let Some(files) = files {
-        results.par_extend(walk_explicit(&files, &dmap, root_dir, options));
+    let mut work = vec![];
+
+    // Step 1: check the files explicitly mentioned by the user
+    let (tx, rx) = mpsc::channel();
+    results.par_extend(walk_explicit(files, &dmap, root_dir, tx, options));
+    while let Ok(dir) = rx.recv() {
+        if options.list_ignored || options.list_unknown && !dir_ignore_fn(dir)
+        {
+            work.push(dir)
+        }
+    }
+
+    let mut results = results.into_iter().flatten().collect();
+
+    // Step 2: recursively check the working directory for changes if needed
+    for dir in work {
+        if dir_ignore_fn(dir) {
+            continue;
+        }
+        results = traverse(
+            matcher, root_dir, &dmap, dir, results, &ignore_fn, options,
+        )?;
     }
 
     if !matcher.is_exact() {
-        let stat_results = stat_dmap_entries(&dmap, root_dir, options);
-        results.par_extend(stat_results);
+        // Step 3: Check the remaining files from the dmap.
+        // If a dmap file is not in results yet, it was either
+        // a) not matched b) ignored, c) missing, or d) under a
+        // symlink directory.
+
+        let to_visit: Box<dyn Iterator<Item = (&HgPathBuf, &DirstateEntry)>> =
+            if results.is_empty() && matcher.matches_everything() {
+                Box::new(dmap.iter())
+            } else {
+                Box::new(dmap.iter().filter_map(|(f, e)| {
+                    if !results.contains_key(f.deref()) && matcher.matches(f) {
+                        Some((f, e))
+                    } else {
+                        None
+                    }
+                }))
+            };
+        let mut to_visit: Vec<_> = to_visit.collect();
+        to_visit.sort_by(|a, b| a.0.cmp(&b.0));
+
+        if options.list_unknown {
+            // We walked all dirs under the roots that weren't ignored, and
+            // everything that matched was stat'ed and is already in results.
+            // The rest must thus be ignored or under a symlink.
+            let mut path_auditor = PathAuditor::new(root_dir);
+
+            for (ref filename, entry) in to_visit {
+                // Report ignored items in the dmap as long as they are not
+                // under a symlink directory.
+                if path_auditor.check(filename) {
+                    // TODO normalize for case-insensitive filesystems
+                    let buf = hg_path_to_path_buf(filename)?;
+                    results.insert(
+                        Cow::Borrowed(filename),
+                        match root_dir.as_ref().join(&buf).symlink_metadata() {
+                            // File was just ignored, no links, and exists
+                            Ok(meta) => {
+                                let metadata = HgMetadata::from_metadata(meta);
+                                dispatch_found(
+                                    filename,
+                                    *entry,
+                                    metadata,
+                                    &dmap.copy_map,
+                                    options,
+                                )
+                            }
+                            // File doesn't exist
+                            Err(_) => dispatch_missing(entry.state),
+                        },
+                    );
+                } else {
+                    // It's either missing or under a symlink directory which
+                    // we, in this case, report as missing.
+                    results.insert(
+                        Cow::Borrowed(filename),
+                        dispatch_missing(entry.state),
+                    );
+                }
+            }
+        } else {
+            // We may not have walked the full directory tree above, so stat
+            // and check everything we missed.
+            let stat_results = stat_dmap_entries(&dmap, root_dir, options);
+            results.par_extend(stat_results.flatten());
+        }
     }
 
-    build_response(results)
+    let results = results.into_iter().filter_map(|(filename, dispatch)| {
+        match dispatch {
+            Dispatch::Bad(_) => return Some((filename, dispatch)),
+            _ => {}
+        };
+        // TODO do this in //, not at the end
+        if !dmap.contains_key(filename.deref()) {
+            if (options.list_ignored || matcher.exact_match(&filename))
+                && dir_ignore_fn(&filename)
+            {
+                if options.list_ignored {
+                    return Some((filename.to_owned(), Dispatch::Ignored));
+                }
+            } else {
+                if !ignore_fn(&filename) {
+                    return Some((filename.to_owned(), Dispatch::Unknown));
+                }
+            }
+            return None;
+        }
+        Some((filename, dispatch))
+    });
+
+    Ok((build_response(results), warnings))
 }