Patchwork D11681: dirstate-v2: adds a flag to mark a file as modified

login
register
mail settings
Submitter phabricator
Date Oct. 18, 2021, 6:19 a.m.
Message ID <differential-rev-PHID-DREV-6tnjaerrz7g5rsab5zwe-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/50006/
State Superseded
Headers show

Comments

phabricator - Oct. 18, 2021, 6:19 a.m.
marmoute created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Right now, a files with a file system state that requires a lookup (same size,
  different mtime) will requires a lookup. If the result of that lookup is a
  modified files, it will remains ambiguous, requiring a lookup on the next status
  run too.
  
  To fix this, we introduce a dedicated flag in the new format. Such flag will
  allow to record such file as "known modified" avoiding an extra lookup later.
  
  As None of the associate code currently exist in the status code, we do the
  minimal implementation: if we read a dirstate entry with this flag set, we make
  it as "ambiguous" so that the next status code has to look it up. The same as it
  would have to without this flag existing anyway.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D11681

AFFECTED FILES
  mercurial/cext/parsers.c
  mercurial/cext/util.h
  mercurial/helptext/internals/dirstate-v2.txt
  mercurial/pure/parsers.py
  rust/hg-core/src/dirstate_tree/on_disk.rs

CHANGE DETAILS




To: marmoute, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -110,6 +110,7 @@ 
         const HAS_DIRECTORY_MTIME = 1 << 5;
         const MODE_EXEC_PERM = 1 << 6;
         const MODE_IS_SYMLINK = 1 << 7;
+        const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
     }
 }
 
@@ -351,12 +352,16 @@ 
         let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
         let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
         let p2_info = self.flags().contains(Flags::P2_INFO);
-        let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) {
+        let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
+            && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
+        {
             Some((self.synthesize_unix_mode(), self.size.into()))
         } else {
             None
         };
-        let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME) {
+        let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
+            && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
+        {
             Some(self.mtime.truncated_seconds.into())
         } else {
             None
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -53,6 +53,7 @@ 
 _DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5  # Unused when Rust is not available
 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
+DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED = 1 << 8
 
 
 @attr.s(slots=True, init=False)
@@ -123,7 +124,15 @@ 
     def from_v2_data(cls, flags, size, mtime):
         """Build a new DirstateItem object from V2 data"""
         has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
+        has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME)
         mode = None
+
+        if flags & +DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED:
+            # we do not have support for this flag in the code yet,
+            # force a lookup for this file.
+            has_mode_size = False
+            has_meaningful_mtime = False
+
         if has_mode_size:
             assert stat.S_IXUSR == 0o100
             if flags & DIRSTATE_V2_MODE_EXEC_PERM:
@@ -139,7 +148,7 @@ 
             p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
             p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
             has_meaningful_data=has_mode_size,
-            has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_FILE_MTIME),
+            has_meaningful_mtime=has_meaningful_mtime,
             parentfiledata=(mode, size, mtime),
         )
 
diff --git a/mercurial/helptext/internals/dirstate-v2.txt b/mercurial/helptext/internals/dirstate-v2.txt
--- a/mercurial/helptext/internals/dirstate-v2.txt
+++ b/mercurial/helptext/internals/dirstate-v2.txt
@@ -383,6 +383,7 @@ 
     HAS_DIRECTORY_MTIME = 1 << 5
     MODE_EXEC_PERM = 1 << 6
     MODE_IS_SYMLINK = 1 << 7
+    EXPECTED_STATE_IS_MODIFIED = 1 << 8
 
   The meaning of each bit is described below.
 
@@ -461,8 +462,7 @@ 
     Must be unset for untracked nodes.
     For files tracked anywhere, if this is set:
     - The `size` field is the expected file size,
-      in bytes truncated its lower to 31 bits,
-      for the file to be clean.
+      in bytes truncated its lower to 31 bits.
     - The expected execute permission for the file’s owner
       is given by `MODE_EXEC_PERM`
     - The expected file type is given by `MODE_IS_SIMLINK`:
@@ -474,8 +474,7 @@ 
     Must be unset for untracked nodes.
     If this and `HAS_DIRECTORY_MTIME` are both unset,
     the `mtime` field is unused (set to zero).
-    If this is set, `mtime` is the modification time
-    expected for the file to be considered clean.
+    If this is set, `mtime` is the expected modification time.
 
 `HAS_DIRECTORY_MTIME`
     Must be unset for file tracked anywhere.
@@ -514,3 +513,20 @@ 
     If `HAS_MODE_AND_SIZE` is set,
     this indicates whether the file is expected to be a symlink
     as opposed to a normal file.
+
+`EXPECTED_STATE_IS_MODIFIED`
+    Must be unset for untracked nodes.
+    For:
+    - a file tracked anywhere
+    - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_FILE_MTIME`)
+    - if that metadata matches
+      metadata found in the working directory with `stat`
+    This bit indicates the status of the file.
+    If set, the status is modified. If unset, it is clean.
+
+    In cases where `hg status` needs to read the contents of a file
+    because metadata is ambiguous, this bit lets it record the result
+    if the result is modified so that a future run of `hg status`
+    does not need to do the same again.
+    It is valid to never set this bit,
+    and consider expected metadata ambiguous if it is set.
diff --git a/mercurial/cext/util.h b/mercurial/cext/util.h
--- a/mercurial/cext/util.h
+++ b/mercurial/cext/util.h
@@ -24,21 +24,22 @@ 
 /* clang-format off */
 typedef struct {
 	PyObject_HEAD
-	unsigned char flags;
+	int flags;
 	int mode;
 	int size;
 	int mtime;
 } dirstateItemObject;
 /* clang-format on */
 
-static const unsigned char dirstate_flag_wc_tracked = 1;
-static const unsigned char dirstate_flag_p1_tracked = 1 << 1;
-static const unsigned char dirstate_flag_p2_info = 1 << 2;
-static const unsigned char dirstate_flag_has_meaningful_data = 1 << 3;
-static const unsigned char dirstate_flag_has_file_mtime = 1 << 4;
-static const unsigned char dirstate_flag_has_directory_mtime = 1 << 5;
-static const unsigned char dirstate_flag_mode_exec_perm = 1 << 6;
-static const unsigned char dirstate_flag_mode_is_symlink = 1 << 7;
+static const int dirstate_flag_wc_tracked = 1;
+static const int dirstate_flag_p1_tracked = 1 << 1;
+static const int dirstate_flag_p2_info = 1 << 2;
+static const int dirstate_flag_has_meaningful_data = 1 << 3;
+static const int dirstate_flag_has_file_mtime = 1 << 4;
+static const int dirstate_flag_has_directory_mtime = 1 << 5;
+static const int dirstate_flag_mode_exec_perm = 1 << 6;
+static const int dirstate_flag_mode_is_symlink = 1 << 7;
+static const int dirstate_flag_expected_state_is_modified = 1 << 8;
 
 extern PyTypeObject dirstateItemType;
 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
diff --git a/mercurial/cext/parsers.c b/mercurial/cext/parsers.c
--- a/mercurial/cext/parsers.c
+++ b/mercurial/cext/parsers.c
@@ -139,18 +139,16 @@ 
 
 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
 {
-	const unsigned char mask = dirstate_flag_wc_tracked |
-	                           dirstate_flag_p1_tracked |
-	                           dirstate_flag_p2_info;
+	const int mask = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
+	                 dirstate_flag_p2_info;
 	return (self->flags & mask);
 }
 
 static inline bool dirstate_item_c_added(dirstateItemObject *self)
 {
-	const unsigned char mask =
-	    (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
-	     dirstate_flag_p2_info);
-	const unsigned char target = dirstate_flag_wc_tracked;
+	const int mask = (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
+	                  dirstate_flag_p2_info);
+	const int target = dirstate_flag_wc_tracked;
 	return (self->flags & mask) == target;
 }
 
@@ -237,7 +235,7 @@ 
 
 static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
 {
-	unsigned char flags = self->flags;
+	int flags = self->flags;
 	int mode = dirstate_item_c_v1_mode(self);
 	if ((mode & S_IXUSR) != 0) {
 		flags |= dirstate_flag_mode_exec_perm;
@@ -249,7 +247,7 @@ 
 	} else {
 		flags &= ~dirstate_flag_mode_is_symlink;
 	}
-	return Py_BuildValue("Bii", flags, self->size, self->mtime);
+	return Py_BuildValue("iii", flags, self->size, self->mtime);
 };
 
 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
@@ -372,9 +370,14 @@ 
 	if (!t) {
 		return NULL;
 	}
-	if (!PyArg_ParseTuple(args, "bii", &t->flags, &t->size, &t->mtime)) {
+	if (!PyArg_ParseTuple(args, "iii", &t->flags, &t->size, &t->mtime)) {
 		return NULL;
 	}
+	if (t->flags & dirstate_flag_expected_state_is_modified) {
+		t->flags &= ~(dirstate_flag_expected_state_is_modified |
+		              dirstate_flag_has_meaningful_data |
+		              dirstate_flag_has_file_mtime);
+	}
 	t->mode = 0;
 	if (t->flags & dirstate_flag_has_meaningful_data) {
 		if (t->flags & dirstate_flag_mode_exec_perm) {