dirstate-v2: adds a flag to mark a file as modified
authorSimon Sapin <simon.sapin@octobus.net>
Fri, 15 Oct 2021 16:12:00 +0200
changeset 48250 1730b2fceaa1
parent 48249 e9faae0f445c
child 48251 dfc5a505ddc5
dirstate-v2: adds a flag to mark a file as modified Right now, a files with a file system state that requires a lookup (same size, different mtime) will requires a lookup. If the result of that lookup is a modified files, it will remains ambiguous, requiring a lookup on the next status run too. To fix this, we introduce a dedicated flag in the new format. Such flag will allow to record such file as "known modified" avoiding an extra lookup later. As None of the associate code currently exist in the status code, we do the minimal implementation: if we read a dirstate entry with this flag set, we make it as "ambiguous" so that the next status code has to look it up. The same as it would have to without this flag existing anyway. Differential Revision: https://phab.mercurial-scm.org/D11681
mercurial/cext/parsers.c
mercurial/cext/util.h
mercurial/helptext/internals/dirstate-v2.txt
mercurial/pure/parsers.py
rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/mercurial/cext/parsers.c	Tue Oct 19 10:52:13 2021 +0100
+++ b/mercurial/cext/parsers.c	Fri Oct 15 16:12:00 2021 +0200
@@ -139,18 +139,16 @@
 
 static inline bool dirstate_item_c_any_tracked(dirstateItemObject *self)
 {
-	const unsigned char mask = dirstate_flag_wc_tracked |
-	                           dirstate_flag_p1_tracked |
-	                           dirstate_flag_p2_info;
+	const int mask = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
+	                 dirstate_flag_p2_info;
 	return (self->flags & mask);
 }
 
 static inline bool dirstate_item_c_added(dirstateItemObject *self)
 {
-	const unsigned char mask =
-	    (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
-	     dirstate_flag_p2_info);
-	const unsigned char target = dirstate_flag_wc_tracked;
+	const int mask = (dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
+	                  dirstate_flag_p2_info);
+	const int target = dirstate_flag_wc_tracked;
 	return (self->flags & mask) == target;
 }
 
@@ -237,7 +235,7 @@
 
 static PyObject *dirstate_item_v2_data(dirstateItemObject *self)
 {
-	unsigned char flags = self->flags;
+	int flags = self->flags;
 	int mode = dirstate_item_c_v1_mode(self);
 	if ((mode & S_IXUSR) != 0) {
 		flags |= dirstate_flag_mode_exec_perm;
@@ -249,7 +247,7 @@
 	} else {
 		flags &= ~dirstate_flag_mode_is_symlink;
 	}
-	return Py_BuildValue("Bii", flags, self->size, self->mtime);
+	return Py_BuildValue("iii", flags, self->size, self->mtime);
 };
 
 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
@@ -372,9 +370,14 @@
 	if (!t) {
 		return NULL;
 	}
-	if (!PyArg_ParseTuple(args, "bii", &t->flags, &t->size, &t->mtime)) {
+	if (!PyArg_ParseTuple(args, "iii", &t->flags, &t->size, &t->mtime)) {
 		return NULL;
 	}
+	if (t->flags & dirstate_flag_expected_state_is_modified) {
+		t->flags &= ~(dirstate_flag_expected_state_is_modified |
+		              dirstate_flag_has_meaningful_data |
+		              dirstate_flag_has_file_mtime);
+	}
 	t->mode = 0;
 	if (t->flags & dirstate_flag_has_meaningful_data) {
 		if (t->flags & dirstate_flag_mode_exec_perm) {
--- a/mercurial/cext/util.h	Tue Oct 19 10:52:13 2021 +0100
+++ b/mercurial/cext/util.h	Fri Oct 15 16:12:00 2021 +0200
@@ -24,21 +24,22 @@
 /* clang-format off */
 typedef struct {
 	PyObject_HEAD
-	unsigned char flags;
+	int flags;
 	int mode;
 	int size;
 	int mtime;
 } dirstateItemObject;
 /* clang-format on */
 
-static const unsigned char dirstate_flag_wc_tracked = 1;
-static const unsigned char dirstate_flag_p1_tracked = 1 << 1;
-static const unsigned char dirstate_flag_p2_info = 1 << 2;
-static const unsigned char dirstate_flag_has_meaningful_data = 1 << 3;
-static const unsigned char dirstate_flag_has_file_mtime = 1 << 4;
-static const unsigned char dirstate_flag_has_directory_mtime = 1 << 5;
-static const unsigned char dirstate_flag_mode_exec_perm = 1 << 6;
-static const unsigned char dirstate_flag_mode_is_symlink = 1 << 7;
+static const int dirstate_flag_wc_tracked = 1;
+static const int dirstate_flag_p1_tracked = 1 << 1;
+static const int dirstate_flag_p2_info = 1 << 2;
+static const int dirstate_flag_has_meaningful_data = 1 << 3;
+static const int dirstate_flag_has_file_mtime = 1 << 4;
+static const int dirstate_flag_has_directory_mtime = 1 << 5;
+static const int dirstate_flag_mode_exec_perm = 1 << 6;
+static const int dirstate_flag_mode_is_symlink = 1 << 7;
+static const int dirstate_flag_expected_state_is_modified = 1 << 8;
 
 extern PyTypeObject dirstateItemType;
 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
--- a/mercurial/helptext/internals/dirstate-v2.txt	Tue Oct 19 10:52:13 2021 +0100
+++ b/mercurial/helptext/internals/dirstate-v2.txt	Fri Oct 15 16:12:00 2021 +0200
@@ -383,6 +383,7 @@
     HAS_DIRECTORY_MTIME = 1 << 5
     MODE_EXEC_PERM = 1 << 6
     MODE_IS_SYMLINK = 1 << 7
+    EXPECTED_STATE_IS_MODIFIED = 1 << 8
 
   The meaning of each bit is described below.
 
@@ -461,8 +462,7 @@
     Must be unset for untracked nodes.
     For files tracked anywhere, if this is set:
     - The `size` field is the expected file size,
-      in bytes truncated its lower to 31 bits,
-      for the file to be clean.
+      in bytes truncated its lower to 31 bits.
     - The expected execute permission for the file’s owner
       is given by `MODE_EXEC_PERM`
     - The expected file type is given by `MODE_IS_SIMLINK`:
@@ -474,8 +474,7 @@
     Must be unset for untracked nodes.
     If this and `HAS_DIRECTORY_MTIME` are both unset,
     the `mtime` field is unused (set to zero).
-    If this is set, `mtime` is the modification time
-    expected for the file to be considered clean.
+    If this is set, `mtime` is the expected modification time.
 
 `HAS_DIRECTORY_MTIME`
     Must be unset for file tracked anywhere.
@@ -514,3 +513,20 @@
     If `HAS_MODE_AND_SIZE` is set,
     this indicates whether the file is expected to be a symlink
     as opposed to a normal file.
+
+`EXPECTED_STATE_IS_MODIFIED`
+    Must be unset for untracked nodes.
+    For:
+    - a file tracked anywhere
+    - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_FILE_MTIME`)
+    - if that metadata matches
+      metadata found in the working directory with `stat`
+    This bit indicates the status of the file.
+    If set, the status is modified. If unset, it is clean.
+
+    In cases where `hg status` needs to read the contents of a file
+    because metadata is ambiguous, this bit lets it record the result
+    if the result is modified so that a future run of `hg status`
+    does not need to do the same again.
+    It is valid to never set this bit,
+    and consider expected metadata ambiguous if it is set.
--- a/mercurial/pure/parsers.py	Tue Oct 19 10:52:13 2021 +0100
+++ b/mercurial/pure/parsers.py	Fri Oct 15 16:12:00 2021 +0200
@@ -53,6 +53,7 @@
 _DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5  # Unused when Rust is not available
 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
+DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED = 1 << 8
 
 
 @attr.s(slots=True, init=False)
@@ -123,7 +124,15 @@
     def from_v2_data(cls, flags, size, mtime):
         """Build a new DirstateItem object from V2 data"""
         has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
+        has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME)
         mode = None
+
+        if flags & +DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED:
+            # we do not have support for this flag in the code yet,
+            # force a lookup for this file.
+            has_mode_size = False
+            has_meaningful_mtime = False
+
         if has_mode_size:
             assert stat.S_IXUSR == 0o100
             if flags & DIRSTATE_V2_MODE_EXEC_PERM:
@@ -139,7 +148,7 @@
             p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
             p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
             has_meaningful_data=has_mode_size,
-            has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_FILE_MTIME),
+            has_meaningful_mtime=has_meaningful_mtime,
             parentfiledata=(mode, size, mtime),
         )
 
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Tue Oct 19 10:52:13 2021 +0100
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Fri Oct 15 16:12:00 2021 +0200
@@ -110,6 +110,7 @@
         const HAS_DIRECTORY_MTIME = 1 << 5;
         const MODE_EXEC_PERM = 1 << 6;
         const MODE_IS_SYMLINK = 1 << 7;
+        const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
     }
 }
 
@@ -351,12 +352,16 @@
         let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
         let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
         let p2_info = self.flags().contains(Flags::P2_INFO);
-        let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) {
+        let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE)
+            && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
+        {
             Some((self.synthesize_unix_mode(), self.size.into()))
         } else {
             None
         };
-        let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME) {
+        let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
+            && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
+        {
             Some(self.mtime.truncated_seconds.into())
         } else {
             None