Patchwork D11694: dirstate-v2: adjust the meaning of directory flags

login
register
mail settings
Submitter phabricator
Date Oct. 19, 2021, 6:19 p.m.
Message ID <differential-rev-PHID-DREV-nrtrbbl4uevzvjqk7fly-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/50019/
State Superseded
Headers show

Comments

phabricator - Oct. 19, 2021, 6:19 p.m.
marmoute created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Tracking directory "explicitly" give use the opportunity to distinct between
  entry that are untracked because they are part of the directory structure and
  entry that are ignored/unknown files on the files system.
  
  The help is adjusted to the new semantic and the code now comply to it for both
  read and write.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D11694

AFFECTED FILES
  mercurial/cext/parsers.c
  mercurial/cext/util.h
  mercurial/dirstateutils/v2.py
  mercurial/helptext/internals/dirstate-v2.txt
  mercurial/pure/parsers.py
  rust/hg-core/src/dirstate_tree/on_disk.rs

CHANGE DETAILS




To: marmoute, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -106,8 +106,8 @@ 
         const P1_TRACKED = 1 << 1;
         const P2_INFO = 1 << 2;
         const HAS_MODE_AND_SIZE = 1 << 3;
-        const HAS_FILE_MTIME = 1 << 4;
-        const HAS_DIRECTORY_MTIME = 1 << 5;
+        const HAS_MTIME = 1 << 4;
+        const DIRECTORY = 1 << 5;
         const MODE_EXEC_PERM = 1 << 6;
         const MODE_IS_SYMLINK = 1 << 7;
         const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
@@ -328,16 +328,13 @@ 
     pub(super) fn cached_directory_mtime(
         &self,
     ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
-        // For now we do not have code to handle ALL_UNKNOWN_RECORDED, so we
-        // ignore the mtime if the flag is set.
-        if self.flags().contains(Flags::HAS_DIRECTORY_MTIME)
+        // For now we do not have code to handle lacks of ALL_UNKNOWN_RECORDED,
+        // so we ignore the mtime if the flag is unset.
+        if self.flags().contains(Flags::DIRECTORY)
+            && self.flags().contains(Flags::HAS_MTIME)
             && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
         {
-            if self.flags().contains(Flags::HAS_FILE_MTIME) {
-                Err(DirstateV2ParseError)
-            } else {
-                Ok(Some(self.mtime.try_into()?))
-            }
+            Ok(Some(self.mtime.try_into()?))
         } else {
             Ok(None)
         }
@@ -369,7 +366,8 @@ 
         } else {
             None
         };
-        let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
+        let mtime = if self.flags().contains(Flags::HAS_MTIME)
+            && !self.flags().contains(Flags::DIRECTORY)
             && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
         {
             Some(self.mtime.truncated_seconds.into())
@@ -449,7 +447,7 @@ 
             0.into()
         };
         let mtime = if let Some(m) = mtime_opt {
-            flags.insert(Flags::HAS_FILE_MTIME);
+            flags.insert(Flags::HAS_MTIME);
             PackedTruncatedTimestamp {
                 truncated_seconds: m.into(),
                 nanoseconds: 0.into(),
@@ -630,13 +628,14 @@ 
                             // We never set ALL_IGNORED_RECORDED since we
                             // don't track that case
                             // currently.
-                            Flags::HAS_DIRECTORY_MTIME
+                            Flags::DIRECTORY
+                                | Flags::HAS_MTIME
                                 | Flags::ALL_UNKNOWN_RECORDED,
                             0.into(),
                             (*mtime).into(),
                         ),
                         dirstate_map::NodeData::None => (
-                            Flags::empty(),
+                            Flags::DIRECTORY,
                             0.into(),
                             PackedTruncatedTimestamp::null(),
                         ),
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -49,8 +49,8 @@ 
 DIRSTATE_V2_P1_TRACKED = 1 << 1
 DIRSTATE_V2_P2_INFO = 1 << 2
 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
-DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4
-_DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5  # Unused when Rust is not available
+DIRSTATE_V2_HAS_MTIME = 1 << 4
+DIRSTATE_V2_DIRECTORY = 1 << 5
 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
 DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED = 1 << 8
@@ -137,7 +137,7 @@ 
     def from_v2_data(cls, flags, size, mtime):
         """Build a new DirstateItem object from V2 data"""
         has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
-        has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME)
+        has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_MTIME)
         mode = None
 
         if flags & +DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED:
@@ -441,7 +441,7 @@ 
             if stat.S_ISLNK(self.mode):
                 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
         if self._mtime is not None:
-            flags |= DIRSTATE_V2_HAS_FILE_MTIME
+            flags |= DIRSTATE_V2_HAS_MTIME
 
         if self._fallback_exec is not None:
             flags |= DIRSTATE_V2_HAS_FALLBACK_EXEC
diff --git a/mercurial/helptext/internals/dirstate-v2.txt b/mercurial/helptext/internals/dirstate-v2.txt
--- a/mercurial/helptext/internals/dirstate-v2.txt
+++ b/mercurial/helptext/internals/dirstate-v2.txt
@@ -379,8 +379,8 @@ 
     P1_TRACKED = 1 << 1
     P2_INFO = 1 << 2
     HAS_MODE_AND_SIZE = 1 << 3
-    HAS_FILE_MTIME = 1 << 4
-    HAS_DIRECTORY_MTIME = 1 << 5
+    HAS_MTIME = 1 << 4
+    DIRECTORY = 1 << 5
     MODE_EXEC_PERM = 1 << 6
     MODE_IS_SYMLINK = 1 << 7
     EXPECTED_STATE_IS_MODIFIED = 1 << 8
@@ -476,37 +476,44 @@ 
     If this is unset the expected size, permission, and file type are unknown.
     The `size` field is unused (set to zero).
 
-`HAS_FILE_MTIME`
-    Must be unset for untracked nodes.
-    If this and `HAS_DIRECTORY_MTIME` are both unset,
-    the `mtime` field is unused (set to zero).
-    If this is set, `mtime` is the expected modification time.
+`HAS_MTIME`
+    The nodes contains a "valid" last modification time in the `mtime` field.
+
 
-`HAS_DIRECTORY_MTIME`
-    Must be unset for file tracked anywhere.
-    If this and `HAS_DIRECTORY_MTIME` are both unset,
-    the `mtime` field is unused (set to zero).
-    If this is set, at some point,
-    this path in the working directory was observed:
-
-    - To be a directory
-    - With the modification time given in `mtime`
-    - That time was already strictly in the past when observed,
-      meaning that later changes cannot happen in the same clock tick
-      and must cause a different modification time
-      (unless the system clock jumps back and we get unlucky,
-      which is not impossible but deemed unlikely enough).
-    - All direct children of this directory
-      (as returned by `std::fs::read_dir`)
-      either have a corresponding dirstate node,
-      or are ignored by ignore patterns whose hash is in tree metadata.
+    It means the `mtime` was already strictly in the past when observed,
+    meaning that later changes cannot happen in the same clock tick
+    and must cause a different modification time
+    (unless the system clock jumps back and we get unlucky,
+    which is not impossible but deemed unlikely enough).
 
     This means that if `std::fs::symlink_metadata` later reports
     the same modification time
     and ignored patterns haven’t changed,
-    a run of status that is not listing ignored files
-    can skip calling `std::fs::read_dir` again for this directory,
+    we can assume the node to be unchangeset on disk.
+
+    The `mtime` field can then be used to skip more expensive lookup when
+    checking the status of "tracked" nodes.
+
+    It can also be set for node where `DIRECTORY` is set.
+    See `DIRECTORY` documentation for details.
+
+`DIRECTORY`
+    When set, this entry  will match a directory that exist or existed on the
+    file system.
+
+    * When `HAS_MTIME` is set a directory has been seen on the file system and
+      `mtime` matches its last modificiation time. However, `HAS_MTIME` not being set
+      does not indicate the lack of directory on the file system.
+
+    * When not tracked anywhere, this node does not represent an ignored or
+      unknown files on disk.
+
+    If `HAS_MTIME` is set
+    and `mtime` match the last modification time of the directory on disk,
+    the directory is unchanged
+    and we can skip calling `std::fs::read_dir` again for this directory,
     and iterate child dirstate nodes instead.
+    (as long as `ALL_UNKNOWN_RECORDED` and `ALL_IGNORED_RECORDED` are taken in account)
 
 `MODE_EXEC_PERM`
     Must be unset if `HAS_MODE_AND_SIZE` is unset.
@@ -524,7 +531,7 @@ 
     Must be unset for untracked nodes.
     For:
     - a file tracked anywhere
-    - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_FILE_MTIME`)
+    - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_MTIME`)
     - if that metadata matches
       metadata found in the working directory with `stat`
     This bit indicates the status of the file.
@@ -540,7 +547,7 @@ 
 `ALL_UNKNOWN_RECORDED`
     If set, all "unknown" children existing on disk (at the time of the last
     status) have been recorded and the `mtime` associated with
-    `HAS_DIRECTORY_MTIME` can be used for optimization even when "unknown" file
+    `DIRECTORY` can be used for optimization even when "unknown" file
     are listed.
 
     Note that the amount recorded "unknown" children can still be zero if None
@@ -553,7 +560,7 @@ 
 `ALL_IGNORED_RECORDED`
     If set, all "ignored" children existing on disk (at the time of the last
     status) have been recorded and the `mtime` associated with
-    `HAS_DIRECTORY_MTIME` can be used for optimization even when "ignored" file
+    `DIRECTORY` can be used for optimization even when "ignored" file
     are listed.
 
     Note that the amount recorded "ignored" children can still be zero if None
diff --git a/mercurial/dirstateutils/v2.py b/mercurial/dirstateutils/v2.py
--- a/mercurial/dirstateutils/v2.py
+++ b/mercurial/dirstateutils/v2.py
@@ -56,6 +56,9 @@ 
 assert TREE_METADATA_SIZE == TREE_METADATA.size
 assert NODE_SIZE == NODE.size
 
+# match constant in mercuria/pure/parsers.py
+DIRSTATE_V2_DIRECTORY = 1 << 5
+
 
 def parse_dirstate(map, copy_map, data, tree_metadata):
     """parse a full v2-dirstate from a binary data into dictionnaries:
@@ -83,7 +86,7 @@ 
     This is used by parse_dirstate to recursively fill `map` and `copy_map`.
 
     All directory specific information is ignored and do not need any
-    processing (HAS_DIRECTORY_MTIME, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)
+    processing (DIRECTORY, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)
     """
     for i in range(len):
         node_start = start + NODE_SIZE * i
@@ -151,7 +154,7 @@ 
             mtime_ns = 0
         else:
             # There are no mtime-cached directories in the Python implementation
-            flags = 0
+            flags = DIRSTATE_V2_DIRECTORY
             size = 0
             mtime_s = 0
             mtime_ns = 0
diff --git a/mercurial/cext/util.h b/mercurial/cext/util.h
--- a/mercurial/cext/util.h
+++ b/mercurial/cext/util.h
@@ -35,8 +35,8 @@ 
 static const int dirstate_flag_p1_tracked = 1 << 1;
 static const int dirstate_flag_p2_info = 1 << 2;
 static const int dirstate_flag_has_meaningful_data = 1 << 3;
-static const int dirstate_flag_has_file_mtime = 1 << 4;
-static const int dirstate_flag_has_directory_mtime = 1 << 5;
+static const int dirstate_flag_has_mtime = 1 << 4;
+static const int dirstate_flag_directory = 1 << 5;
 static const int dirstate_flag_mode_exec_perm = 1 << 6;
 static const int dirstate_flag_mode_is_symlink = 1 << 7;
 static const int dirstate_flag_expected_state_is_modified = 1 << 8;
diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c
--- a/mercurial/cext/parsers.c
+++ b/mercurial/cext/parsers.c
@@ -133,7 +133,7 @@ 
 		t->size = 0;
 	}
 	if (has_meaningful_mtime) {
-		t->flags |= dirstate_flag_has_file_mtime;
+		t->flags |= dirstate_flag_has_mtime;
 		t->mtime = mtime;
 	} else {
 		t->mtime = 0;
@@ -248,7 +248,7 @@ 
 {
 	if (dirstate_item_c_removed(self)) {
 		return 0;
-	} else if (!(self->flags & dirstate_flag_has_file_mtime) ||
+	} else if (!(self->flags & dirstate_flag_has_mtime) ||
 	           !(self->flags & dirstate_flag_p1_tracked) ||
 	           !(self->flags & dirstate_flag_wc_tracked) ||
 	           (self->flags & dirstate_flag_p2_info)) {
@@ -357,7 +357,7 @@ 
 			t->flags = (dirstate_flag_wc_tracked |
 			            dirstate_flag_p1_tracked |
 			            dirstate_flag_has_meaningful_data |
-			            dirstate_flag_has_file_mtime);
+			            dirstate_flag_has_mtime);
 			t->mode = mode;
 			t->size = size;
 			t->mtime = mtime;
@@ -401,7 +401,7 @@ 
 	if (t->flags & dirstate_flag_expected_state_is_modified) {
 		t->flags &= ~(dirstate_flag_expected_state_is_modified |
 		              dirstate_flag_has_meaningful_data |
-		              dirstate_flag_has_file_mtime);
+		              dirstate_flag_has_mtime);
 	}
 	t->mode = 0;
 	if (t->flags & dirstate_flag_has_meaningful_data) {
@@ -423,7 +423,7 @@ 
    to make sure it is correct. */
 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
 {
-	self->flags &= ~dirstate_flag_has_file_mtime;
+	self->flags &= ~dirstate_flag_has_mtime;
 	Py_RETURN_NONE;
 }
 
@@ -437,7 +437,7 @@ 
 	}
 	self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
 	              dirstate_flag_has_meaningful_data |
-	              dirstate_flag_has_file_mtime;
+	              dirstate_flag_has_mtime;
 	self->mode = mode;
 	self->size = size;
 	self->mtime = mtime;
@@ -447,7 +447,7 @@ 
 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
 {
 	self->flags |= dirstate_flag_wc_tracked;
-	self->flags &= ~dirstate_flag_has_file_mtime;
+	self->flags &= ~dirstate_flag_has_mtime;
 	Py_RETURN_NONE;
 }
 
@@ -465,7 +465,7 @@ 
 	if (self->flags & dirstate_flag_p2_info) {
 		self->flags &= ~(dirstate_flag_p2_info |
 		                 dirstate_flag_has_meaningful_data |
-		                 dirstate_flag_has_file_mtime);
+		                 dirstate_flag_has_mtime);
 		self->mode = 0;
 		self->mtime = 0;
 		self->size = 0;