Patchwork D10834: rust: Parse "subinclude"d files along the way, not later

login
register
mail settings
Submitter phabricator
Date June 4, 2021, 9:03 a.m.
Message ID <differential-rev-PHID-DREV-aqy2eil5letfko33m4tn-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/49154/
State Superseded
Headers show

Comments

phabricator - June 4, 2021, 9:03 a.m.
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  When parsing a `.hgignore` file and encountering an `include:` line,
  the included file is parsed recursively right then in a depth-first fashion.
  
  With `subinclude:` however included files were parsed (recursively) much later.
  This changes it to be expanded during parsing, like `.hgignore`.
  
  The motivation for this is an upcoming changeset that needs to detect changes
  in which files are ignored or not. The plan is to hash all ignore files while
  they are being read, and store that hash in the dirstate (in v2 format).
  In order to allow a potential alternative implementations to read that format,
  the algorithm to compute that hash must be documented. Having a well-defined
  depth-first ordering for the tree of (sub-)included files makes that easier.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D10834

AFFECTED FILES
  rust/hg-core/src/filepatterns.rs
  rust/hg-core/src/matchers.rs
  rust/hg-cpython/src/dirstate/status.rs

CHANGE DETAILS




To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-cpython/src/dirstate/status.rs b/rust/hg-cpython/src/dirstate/status.rs
--- a/rust/hg-cpython/src/dirstate/status.rs
+++ b/rust/hg-cpython/src/dirstate/status.rs
@@ -211,12 +211,9 @@ 
                 .collect();
 
             let ignore_patterns = ignore_patterns?;
-            let mut all_warnings = vec![];
 
-            let (matcher, warnings) =
-                IncludeMatcher::new(ignore_patterns, &root_dir)
-                    .map_err(|e| handle_fallback(py, e.into()))?;
-            all_warnings.extend(warnings);
+            let matcher = IncludeMatcher::new(ignore_patterns)
+                .map_err(|e| handle_fallback(py, e.into()))?;
 
             let (status_res, warnings) = dmap
                 .status(
@@ -234,9 +231,7 @@ 
                 )
                 .map_err(|e| handle_fallback(py, e))?;
 
-            all_warnings.extend(warnings);
-
-            build_response(py, status_res, all_warnings)
+            build_response(py, status_res, warnings)
         }
         e => Err(PyErr::new::<ValueError, _>(
             py,
diff --git a/rust/hg-core/src/matchers.rs b/rust/hg-core/src/matchers.rs
--- a/rust/hg-core/src/matchers.rs
+++ b/rust/hg-core/src/matchers.rs
@@ -11,7 +11,7 @@ 
     dirstate::dirs_multiset::DirsChildrenMultiset,
     filepatterns::{
         build_single_regex, filter_subincludes, get_patterns_from_file,
-        PatternFileWarning, PatternResult, SubInclude,
+        PatternFileWarning, PatternResult,
     },
     utils::{
         files::find_dirs,
@@ -341,8 +341,8 @@ 
 
 /// Returns the regex pattern and a function that matches an `HgPath` against
 /// said regex formed by the given ignore patterns.
-fn build_regex_match<'a>(
-    ignore_patterns: &'a [&'a IgnorePattern],
+fn build_regex_match(
+    ignore_patterns: &[IgnorePattern],
 ) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + Sync>)> {
     let mut regexps = vec![];
     let mut exact_set = HashSet::new();
@@ -478,32 +478,25 @@ 
 /// Returns a function that checks whether a given file (in the general sense)
 /// should be matched.
 fn build_match<'a, 'b>(
-    ignore_patterns: &'a [IgnorePattern],
-    root_dir: &Path,
-) -> PatternResult<(
-    Vec<u8>,
-    Box<dyn Fn(&HgPath) -> bool + 'b + Sync>,
-    Vec<PatternFileWarning>,
-)> {
+    ignore_patterns: Vec<IgnorePattern>,
+) -> PatternResult<(Vec<u8>, Box<dyn Fn(&HgPath) -> bool + 'b + Sync>)> {
     let mut match_funcs: Vec<Box<dyn Fn(&HgPath) -> bool + Sync>> = vec![];
     // For debugging and printing
     let mut patterns = vec![];
-    let mut all_warnings = vec![];
 
-    let (subincludes, ignore_patterns) =
-        filter_subincludes(ignore_patterns, root_dir)?;
+    let (subincludes, ignore_patterns) = filter_subincludes(ignore_patterns)?;
 
     if !subincludes.is_empty() {
         // Build prefix-based matcher functions for subincludes
         let mut submatchers = FastHashMap::default();
         let mut prefixes = vec![];
 
-        for SubInclude { prefix, root, path } in subincludes.into_iter() {
-            let (match_fn, warnings) =
-                get_ignore_function(vec![path.to_path_buf()], &root)?;
-            all_warnings.extend(warnings);
-            prefixes.push(prefix.to_owned());
-            submatchers.insert(prefix.to_owned(), match_fn);
+        for sub_include in subincludes {
+            let matcher = IncludeMatcher::new(sub_include.included_patterns)?;
+            let match_fn =
+                Box::new(move |path: &HgPath| matcher.matches(path));
+            prefixes.push(sub_include.prefix.clone());
+            submatchers.insert(sub_include.prefix.clone(), match_fn);
         }
 
         let match_subinclude = move |filename: &HgPath| {
@@ -556,14 +549,13 @@ 
     }
 
     Ok(if match_funcs.len() == 1 {
-        (patterns, match_funcs.remove(0), all_warnings)
+        (patterns, match_funcs.remove(0))
     } else {
         (
             patterns,
             Box::new(move |f: &HgPath| -> bool {
                 match_funcs.iter().any(|match_func| match_func(f))
             }),
-            all_warnings,
         )
     })
 }
@@ -588,8 +580,7 @@ 
         all_patterns.extend(patterns.to_owned());
         all_warnings.extend(warnings);
     }
-    let (matcher, warnings) = IncludeMatcher::new(all_patterns, root_dir)?;
-    all_warnings.extend(warnings);
+    let matcher = IncludeMatcher::new(all_patterns)?;
     Ok((
         Box::new(move |path: &HgPath| matcher.matches(path)),
         all_warnings,
@@ -597,34 +588,26 @@ 
 }
 
 impl<'a> IncludeMatcher<'a> {
-    pub fn new(
-        ignore_patterns: Vec<IgnorePattern>,
-        root_dir: &Path,
-    ) -> PatternResult<(Self, Vec<PatternFileWarning>)> {
-        let (patterns, match_fn, warnings) =
-            build_match(&ignore_patterns, root_dir)?;
+    pub fn new(ignore_patterns: Vec<IgnorePattern>) -> PatternResult<Self> {
         let RootsDirsAndParents {
             roots,
             dirs,
             parents,
         } = roots_dirs_and_parents(&ignore_patterns)?;
-
         let prefix = ignore_patterns.iter().any(|k| match k.syntax {
             PatternSyntax::Path | PatternSyntax::RelPath => true,
             _ => false,
         });
+        let (patterns, match_fn) = build_match(ignore_patterns)?;
 
-        Ok((
-            Self {
-                patterns,
-                match_fn,
-                prefix,
-                roots,
-                dirs,
-                parents,
-            },
-            warnings,
-        ))
+        Ok(Self {
+            patterns,
+            match_fn,
+            prefix,
+            roots,
+            dirs,
+            parents,
+        })
     }
 
     fn get_all_parents_children(&self) -> DirsChildrenMultiset {
@@ -810,14 +793,11 @@ 
     #[test]
     fn test_includematcher() {
         // VisitchildrensetPrefix
-        let (matcher, _) = IncludeMatcher::new(
-            vec![IgnorePattern::new(
-                PatternSyntax::RelPath,
-                b"dir/subdir",
-                Path::new(""),
-            )],
-            "".as_ref(),
-        )
+        let (matcher, _) = IncludeMatcher::new(vec![IgnorePattern::new(
+            PatternSyntax::RelPath,
+            b"dir/subdir",
+            Path::new(""),
+        )])
         .unwrap();
 
         let mut set = HashSet::new();
@@ -848,14 +828,11 @@ 
         );
 
         // VisitchildrensetRootfilesin
-        let (matcher, _) = IncludeMatcher::new(
-            vec![IgnorePattern::new(
-                PatternSyntax::RootFiles,
-                b"dir/subdir",
-                Path::new(""),
-            )],
-            "".as_ref(),
-        )
+        let (matcher, _) = IncludeMatcher::new(vec![IgnorePattern::new(
+            PatternSyntax::RootFiles,
+            b"dir/subdir",
+            Path::new(""),
+        )])
         .unwrap();
 
         let mut set = HashSet::new();
@@ -886,14 +863,11 @@ 
         );
 
         // VisitchildrensetGlob
-        let (matcher, _) = IncludeMatcher::new(
-            vec![IgnorePattern::new(
-                PatternSyntax::Glob,
-                b"dir/z*",
-                Path::new(""),
-            )],
-            "".as_ref(),
-        )
+        let (matcher, _) = IncludeMatcher::new(vec![IgnorePattern::new(
+            PatternSyntax::Glob,
+            b"dir/z*",
+            Path::new(""),
+        )])
         .unwrap();
 
         let mut set = HashSet::new();
diff --git a/rust/hg-core/src/filepatterns.rs b/rust/hg-core/src/filepatterns.rs
--- a/rust/hg-core/src/filepatterns.rs
+++ b/rust/hg-core/src/filepatterns.rs
@@ -41,7 +41,7 @@ 
 /// Appended to the regexp of globs
 const GLOB_SUFFIX: &[u8; 7] = b"(?:/|$)";
 
-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+#[derive(Debug, Clone, PartialEq, Eq)]
 pub enum PatternSyntax {
     /// A regular expression
     Regexp,
@@ -65,6 +65,14 @@ 
     Include,
     /// A file of patterns to match against files under the same directory
     SubInclude,
+    /// SubInclude with the result of parsing the included file
+    ///
+    /// Note: there is no ExpandedInclude because that expansion can be done
+    /// in place by replacing the Include pattern by the included patterns.
+    /// SubInclude requires more handling.
+    ///
+    /// Note: `Box` is used to minimize size impact on other enum variants
+    ExpandedSubInclude(Box<SubInclude>),
 }
 
 /// Transforms a glob pattern into a regex
@@ -218,7 +226,9 @@ 
         PatternSyntax::Glob | PatternSyntax::RootGlob => {
             [glob_to_re(pattern).as_slice(), GLOB_SUFFIX].concat()
         }
-        PatternSyntax::Include | PatternSyntax::SubInclude => unreachable!(),
+        PatternSyntax::Include
+        | PatternSyntax::SubInclude
+        | PatternSyntax::ExpandedSubInclude(_) => unreachable!(),
     }
 }
 
@@ -441,10 +451,10 @@ 
 pub type PatternResult<T> = Result<T, PatternError>;
 
 /// Wrapper for `read_pattern_file` that also recursively expands `include:`
-/// patterns.
+/// and `subinclude:` patterns.
 ///
-/// `subinclude:` is not treated as a special pattern here: unraveling them
-/// needs to occur in the "ignore" phase.
+/// The former are expanded in place, while `PatternSyntax::ExpandedSubInclude`
+/// is used for the latter to form a tree of patterns.
 pub fn get_patterns_from_file(
     pattern_file: &Path,
     root_dir: &Path,
@@ -453,18 +463,35 @@ 
     let patterns = patterns
         .into_iter()
         .flat_map(|entry| -> PatternResult<_> {
-            let IgnorePattern {
-                syntax, pattern, ..
-            } = &entry;
-            Ok(match syntax {
+            Ok(match &entry.syntax {
                 PatternSyntax::Include => {
                     let inner_include =
-                        root_dir.join(get_path_from_bytes(&pattern));
+                        root_dir.join(get_path_from_bytes(&entry.pattern));
                     let (inner_pats, inner_warnings) =
                         get_patterns_from_file(&inner_include, root_dir)?;
                     warnings.extend(inner_warnings);
                     inner_pats
                 }
+                PatternSyntax::SubInclude => {
+                    let mut sub_include = SubInclude::new(
+                        &root_dir,
+                        &entry.pattern,
+                        &entry.source,
+                    )?;
+                    let (inner_patterns, inner_warnings) =
+                        get_patterns_from_file(
+                            &sub_include.path,
+                            &sub_include.root,
+                        )?;
+                    sub_include.included_patterns = inner_patterns;
+                    warnings.extend(inner_warnings);
+                    vec![IgnorePattern {
+                        syntax: PatternSyntax::ExpandedSubInclude(Box::new(
+                            sub_include,
+                        )),
+                        ..entry
+                    }]
+                }
                 _ => vec![entry],
             })
         })
@@ -475,6 +502,7 @@ 
 }
 
 /// Holds all the information needed to handle a `subinclude:` pattern.
+#[derive(Debug, PartialEq, Eq, Clone)]
 pub struct SubInclude {
     /// Will be used for repository (hg) paths that start with this prefix.
     /// It is relative to the current working directory, so comparing against
@@ -484,6 +512,8 @@ 
     pub path: PathBuf,
     /// Folder in the filesystem where this it applies
     pub root: PathBuf,
+
+    pub included_patterns: Vec<IgnorePattern>,
 }
 
 impl SubInclude {
@@ -513,29 +543,25 @@ 
             })?,
             path: path.to_owned(),
             root: new_root.to_owned(),
+            included_patterns: Vec::new(),
         })
     }
 }
 
 /// Separate and pre-process subincludes from other patterns for the "ignore"
 /// phase.
-pub fn filter_subincludes<'a>(
-    ignore_patterns: &'a [IgnorePattern],
-    root_dir: &Path,
-) -> Result<(Vec<SubInclude>, Vec<&'a IgnorePattern>), HgPathError> {
+pub fn filter_subincludes(
+    ignore_patterns: Vec<IgnorePattern>,
+) -> Result<(Vec<Box<SubInclude>>, Vec<IgnorePattern>), HgPathError> {
     let mut subincludes = vec![];
     let mut others = vec![];
 
-    for ignore_pattern in ignore_patterns.iter() {
-        let IgnorePattern {
-            syntax,
-            pattern,
-            source,
-        } = ignore_pattern;
-        if *syntax == PatternSyntax::SubInclude {
-            subincludes.push(SubInclude::new(root_dir, pattern, &source)?);
+    for pattern in ignore_patterns {
+        if let PatternSyntax::ExpandedSubInclude(sub_include) = pattern.syntax
+        {
+            subincludes.push(sub_include);
         } else {
-            others.push(ignore_pattern)
+            others.push(pattern)
         }
     }
     Ok((subincludes, others))