Patchwork D11635: dirstate-v2: Replace the 32-bit `mode` field with two bits

login
register
mail settings
Submitter phabricator
Date Oct. 12, 2021, 5:02 p.m.
Message ID <differential-rev-PHID-DREV-r3hsjfetm7sfg7iyjbq5-req@mercurial-scm.org>
Download mbox | patch
Permalink /patch/49958/
State Superseded
Headers show

Comments

phabricator - Oct. 12, 2021, 5:02 p.m.
SimonSapin created this revision.
Herald added a reviewer: hg-reviewers.
Herald added a subscriber: mercurial-patches.

REVISION SUMMARY
  Previously we stored the entire value from `stat_result.st_mode`,
  like dirstate-v1 does. However only the executable permission
  and type of file (only symbolic links and normal files are supported)
  are relevant to Mecurial.
  
  So replace this field with two bits in the existing bitfield byte.
  For now the unused space is left as padding, as it will be used
  for something else soon.

REPOSITORY
  rHG Mercurial

BRANCH
  default

REVISION DETAIL
  https://phab.mercurial-scm.org/D11635

AFFECTED FILES
  mercurial/helptext/internals/dirstate-v2.txt
  rust/Cargo.lock
  rust/hg-core/Cargo.toml
  rust/hg-core/src/dirstate_tree/on_disk.rs
  rust/hg-cpython/Cargo.toml

CHANGE DETAILS




To: SimonSapin, #hg-reviewers
Cc: mercurial-patches, mercurial-devel

Patch

diff --git a/rust/hg-cpython/Cargo.toml b/rust/hg-cpython/Cargo.toml
--- a/rust/hg-cpython/Cargo.toml
+++ b/rust/hg-cpython/Cargo.toml
@@ -23,7 +23,7 @@ 
 [dependencies]
 crossbeam-channel = "0.4"
 hg-core = { path = "../hg-core"}
-libc = '*'
+libc = "0.2"
 log = "0.4.8"
 env_logger = "0.7.1"
 stable_deref_trait = "1.2.0"
diff --git a/rust/hg-core/src/dirstate_tree/on_disk.rs b/rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs
@@ -107,13 +107,15 @@ 
         const P2_INFO = 1 << 2;
         const HAS_MODE_AND_SIZE = 1 << 3;
         const HAS_MTIME = 1 << 4;
+        const MODE_EXEC_PERM = 1 << 5;
+        const MODE_IS_SYMLINK = 1 << 7;
     }
 }
 
 #[derive(BytesCast, Copy, Clone, Debug)]
 #[repr(C)]
 struct Entry {
-    mode: U32Be,
+    _padding: U32Be,
     size: U32Be,
     mtime: U32Be,
 }
@@ -332,13 +334,27 @@ 
         )
     }
 
+    fn synthesize_unix_mode(&self) -> u32 {
+        let file_type = if self.flags.contains(Flags::MODE_IS_SYMLINK) {
+            libc::S_IFLNK
+        } else {
+            libc::S_IFREG
+        };
+        let permisions = if self.flags.contains(Flags::MODE_EXEC_PERM) {
+            0o755
+        } else {
+            0o644
+        };
+        file_type | permisions
+    }
+
     fn assume_entry(&self) -> DirstateEntry {
         // TODO: convert through raw bits instead?
         let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);
         let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
         let p2_info = self.flags.contains(Flags::P2_INFO);
         let mode_size = if self.flags.contains(Flags::HAS_MODE_AND_SIZE) {
-            Some((self.data.mode.into(), self.data.size.into()))
+            Some((self.synthesize_unix_mode(), self.data.size.into()))
         } else {
             None
         };
@@ -400,13 +416,15 @@ 
         flags.set(Flags::WDIR_TRACKED, wdir_tracked);
         flags.set(Flags::P1_TRACKED, p1_tracked);
         flags.set(Flags::P2_INFO, p2_info);
-        let (mode, size, mtime);
+        let (size, mtime);
         if let Some((m, s)) = mode_size_opt {
-            mode = m;
+            let exec_perm = m & libc::S_IXUSR != 0;
+            let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
+            flags.set(Flags::MODE_EXEC_PERM, exec_perm);
+            flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
             size = s;
             flags.insert(Flags::HAS_MODE_AND_SIZE)
         } else {
-            mode = 0;
             size = 0;
         }
         if let Some(m) = mtime_opt {
@@ -416,7 +434,7 @@ 
             mtime = 0;
         }
         let raw_entry = Entry {
-            mode: mode.into(),
+            _padding: 0.into(),
             size: size.into(),
             mtime: mtime.into(),
         };
@@ -600,7 +618,7 @@ 
                         dirstate_map::NodeData::None => (
                             Flags::empty(),
                             Entry {
-                                mode: 0.into(),
+                                _padding: 0.into(),
                                 size: 0.into(),
                                 mtime: 0.into(),
                             },
diff --git a/rust/hg-core/Cargo.toml b/rust/hg-core/Cargo.toml
--- a/rust/hg-core/Cargo.toml
+++ b/rust/hg-core/Cargo.toml
@@ -17,6 +17,7 @@ 
 im-rc = "15.0.*"
 itertools = "0.9"
 lazy_static = "1.4.0"
+libc = "0.2"
 rand = "0.7.3"
 rand_pcg = "0.2.1"
 rand_distr = "0.2.2"
diff --git a/rust/Cargo.lock b/rust/Cargo.lock
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -1,5 +1,7 @@ 
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
+version = 3
+
 [[package]]
 name = "adler"
 version = "0.2.3"
@@ -386,6 +388,7 @@ 
  "im-rc",
  "itertools",
  "lazy_static",
+ "libc",
  "log",
  "memmap2",
  "micro-timer",
diff --git a/mercurial/helptext/internals/dirstate-v2.txt b/mercurial/helptext/internals/dirstate-v2.txt
--- a/mercurial/helptext/internals/dirstate-v2.txt
+++ b/mercurial/helptext/internals/dirstate-v2.txt
@@ -380,6 +380,9 @@ 
     P2_INFO = 1 << 2
     HAS_MODE_AND_SIZE = 1 << 3
     HAS_MTIME = 1 << 4
+    MODE_EXEC_PERM = 1 << 5
+    MODE_IS_SYMLINK = 1 << 7
+
 
   Other bits are unset. The meaning of these bits are:
 
@@ -414,14 +417,17 @@ 
   in order to optimize `hg status`
   by enabling it to skip `readdir` in more cases.
 
-  When a node is for a file tracked anywhere,
-  the rest of the node data is three fields:
+  When a node is for a file tracked anywhere:
+  - If `HAS_MODE_AND_SIZE` is set, the file is expected
+    to be a symbolic link or a normal file based on `MODE_IS_SYMLINK`.
+  - If `HAS_MODE_AND_SIZE` is set, the file’s owner is expected
+    to have execute permission or not based on `MODE_EXEC_PERM`.
+  - If `HAS_MODE_AND_SIZE` is unset,
+    the expected type of file and permission are unknown.
+  The rest of the node data is three fields:
 
   * Offset 31:
-    If `HAS_MODE_AND_SIZE` is unset, four zero bytes.
-    Otherwise, a 32-bit integer for the Unix mode (as in `stat_result.st_mode`)
-    expected for this file to be considered clean.
-    Only the `S_IXUSR` bit (owner has execute permission) is considered.
+    4 unused bytes, set to zero
 
   * Offset 35:
     If `HAS_MODE_AND_SIZE` is unset, four zero bytes.