dirstate-v2: Replace the 32-bit `mode` field with two bits
authorSimon Sapin <simon.sapin@octobus.net>
Tue, 12 Oct 2021 17:57:57 +0200
changeset 48195 4d5a13253d34
parent 48194 1000db4a71f1
child 48196 47fabca85457
dirstate-v2: Replace the 32-bit `mode` field with two bits Previously we stored the entire value from `stat_result.st_mode`, like dirstate-v1 does. However only the executable permission and type of file (only symbolic links and normal files are supported) are relevant to Mecurial. So replace this field with two bits in the existing bitfield byte. For now the unused space is left as padding, as it will be used for something else soon. Differential Revision: https://phab.mercurial-scm.org/D11635
mercurial/helptext/internals/dirstate-v2.txt
rust/Cargo.lock
rust/hg-core/Cargo.toml
rust/hg-core/src/dirstate_tree/on_disk.rs
rust/hg-cpython/Cargo.toml
--- a/mercurial/helptext/internals/dirstate-v2.txt	Mon Oct 11 18:37:21 2021 +0200
+++ b/mercurial/helptext/internals/dirstate-v2.txt	Tue Oct 12 17:57:57 2021 +0200
@@ -380,6 +380,9 @@
     P2_INFO = 1 << 2
     HAS_MODE_AND_SIZE = 1 << 3
     HAS_MTIME = 1 << 4
+    MODE_EXEC_PERM = 1 << 5
+    MODE_IS_SYMLINK = 1 << 7
+
 
   Other bits are unset. The meaning of these bits are:
 
@@ -414,14 +417,17 @@
   in order to optimize `hg status`
   by enabling it to skip `readdir` in more cases.
 
-  When a node is for a file tracked anywhere,
-  the rest of the node data is three fields:
+  When a node is for a file tracked anywhere:
+  - If `HAS_MODE_AND_SIZE` is set, the file is expected
+    to be a symbolic link or a normal file based on `MODE_IS_SYMLINK`.
+  - If `HAS_MODE_AND_SIZE` is set, the file’s owner is expected
+    to have execute permission or not based on `MODE_EXEC_PERM`.
+  - If `HAS_MODE_AND_SIZE` is unset,
+    the expected type of file and permission are unknown.
+  The rest of the node data is three fields:
 
   * Offset 31:
-    If `HAS_MODE_AND_SIZE` is unset, four zero bytes.
-    Otherwise, a 32-bit integer for the Unix mode (as in `stat_result.st_mode`)
-    expected for this file to be considered clean.
-    Only the `S_IXUSR` bit (owner has execute permission) is considered.
+    4 unused bytes, set to zero
 
   * Offset 35:
     If `HAS_MODE_AND_SIZE` is unset, four zero bytes.
--- a/rust/Cargo.lock	Mon Oct 11 18:37:21 2021 +0200
+++ b/rust/Cargo.lock	Tue Oct 12 17:57:57 2021 +0200
@@ -1,5 +1,7 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
+version = 3
+
 [[package]]
 name = "adler"
 version = "0.2.3"
@@ -386,6 +388,7 @@
  "im-rc",
  "itertools",
  "lazy_static",
+ "libc",
  "log",
  "memmap2",
  "micro-timer",
--- a/rust/hg-core/Cargo.toml	Mon Oct 11 18:37:21 2021 +0200
+++ b/rust/hg-core/Cargo.toml	Tue Oct 12 17:57:57 2021 +0200
@@ -17,6 +17,7 @@
 im-rc = "15.0.*"
 itertools = "0.9"
 lazy_static = "1.4.0"
+libc = "0.2"
 rand = "0.7.3"
 rand_pcg = "0.2.1"
 rand_distr = "0.2.2"
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Mon Oct 11 18:37:21 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Tue Oct 12 17:57:57 2021 +0200
@@ -107,13 +107,15 @@
         const P2_INFO = 1 << 2;
         const HAS_MODE_AND_SIZE = 1 << 3;
         const HAS_MTIME = 1 << 4;
+        const MODE_EXEC_PERM = 1 << 5;
+        const MODE_IS_SYMLINK = 1 << 7;
     }
 }
 
 #[derive(BytesCast, Copy, Clone, Debug)]
 #[repr(C)]
 struct Entry {
-    mode: U32Be,
+    _padding: U32Be,
     size: U32Be,
     mtime: U32Be,
 }
@@ -332,13 +334,27 @@
         )
     }
 
+    fn synthesize_unix_mode(&self) -> u32 {
+        let file_type = if self.flags.contains(Flags::MODE_IS_SYMLINK) {
+            libc::S_IFLNK
+        } else {
+            libc::S_IFREG
+        };
+        let permisions = if self.flags.contains(Flags::MODE_EXEC_PERM) {
+            0o755
+        } else {
+            0o644
+        };
+        file_type | permisions
+    }
+
     fn assume_entry(&self) -> DirstateEntry {
         // TODO: convert through raw bits instead?
         let wdir_tracked = self.flags.contains(Flags::WDIR_TRACKED);
         let p1_tracked = self.flags.contains(Flags::P1_TRACKED);
         let p2_info = self.flags.contains(Flags::P2_INFO);
         let mode_size = if self.flags.contains(Flags::HAS_MODE_AND_SIZE) {
-            Some((self.data.mode.into(), self.data.size.into()))
+            Some((self.synthesize_unix_mode(), self.data.size.into()))
         } else {
             None
         };
@@ -400,13 +416,15 @@
         flags.set(Flags::WDIR_TRACKED, wdir_tracked);
         flags.set(Flags::P1_TRACKED, p1_tracked);
         flags.set(Flags::P2_INFO, p2_info);
-        let (mode, size, mtime);
+        let (size, mtime);
         if let Some((m, s)) = mode_size_opt {
-            mode = m;
+            let exec_perm = m & libc::S_IXUSR != 0;
+            let is_symlink = m & libc::S_IFMT == libc::S_IFLNK;
+            flags.set(Flags::MODE_EXEC_PERM, exec_perm);
+            flags.set(Flags::MODE_IS_SYMLINK, is_symlink);
             size = s;
             flags.insert(Flags::HAS_MODE_AND_SIZE)
         } else {
-            mode = 0;
             size = 0;
         }
         if let Some(m) = mtime_opt {
@@ -416,7 +434,7 @@
             mtime = 0;
         }
         let raw_entry = Entry {
-            mode: mode.into(),
+            _padding: 0.into(),
             size: size.into(),
             mtime: mtime.into(),
         };
@@ -600,7 +618,7 @@
                         dirstate_map::NodeData::None => (
                             Flags::empty(),
                             Entry {
-                                mode: 0.into(),
+                                _padding: 0.into(),
                                 size: 0.into(),
                                 mtime: 0.into(),
                             },
--- a/rust/hg-cpython/Cargo.toml	Mon Oct 11 18:37:21 2021 +0200
+++ b/rust/hg-cpython/Cargo.toml	Tue Oct 12 17:57:57 2021 +0200
@@ -23,7 +23,7 @@
 [dependencies]
 crossbeam-channel = "0.4"
 hg-core = { path = "../hg-core"}
-libc = '*'
+libc = "0.2"
 log = "0.4.8"
 env_logger = "0.7.1"
 stable_deref_trait = "1.2.0"