dirstate-v2: Separate HAS_FILE_MTIME and HAS_DIRECTORY_MTIME flags
authorSimon Sapin <simon.sapin@octobus.net>
Thu, 14 Oct 2021 16:39:16 +0200
changeset 48232 f7fd629ffb98
parent 48231 0524c1359bfc
child 48233 c820866c52f9
dirstate-v2: Separate HAS_FILE_MTIME and HAS_DIRECTORY_MTIME flags Previously the same flag was used, with its meaning based on whether the node otherwise identifies a file tracked anywhere. In addition to being more explicit, this enables storing a directory mtime if a given path used to be tracked in a parent commit (so the dirstate still has data about it) but became a directory in the working copy. (However this is not done yet as it would require a larger change, replacing the `dirstate_map::NodeData` enum with struct fields.) Differential Revision: https://phab.mercurial-scm.org/D11662
mercurial/cext/parsers.c
mercurial/cext/util.h
mercurial/helptext/internals/dirstate-v2.txt
mercurial/pure/parsers.py
rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/mercurial/cext/parsers.c	Thu Oct 14 16:06:31 2021 +0200
+++ b/mercurial/cext/parsers.c	Thu Oct 14 16:39:16 2021 +0200
@@ -119,7 +119,7 @@
 		t->size = 0;
 	}
 	if (has_meaningful_mtime) {
-		t->flags |= dirstate_flag_has_meaningful_mtime;
+		t->flags |= dirstate_flag_has_file_mtime;
 		t->mtime = mtime;
 	} else {
 		t->mtime = 0;
@@ -225,7 +225,7 @@
 {
 	if (dirstate_item_c_removed(self)) {
 		return 0;
-	} else if (!(self->flags & dirstate_flag_has_meaningful_mtime) ||
+	} else if (!(self->flags & dirstate_flag_has_file_mtime) ||
 	           !(self->flags & dirstate_flag_p1_tracked) ||
 	           !(self->flags & dirstate_flag_wc_tracked) ||
 	           (self->flags & dirstate_flag_p2_info)) {
@@ -334,7 +334,7 @@
 			t->flags = (dirstate_flag_wc_tracked |
 			            dirstate_flag_p1_tracked |
 			            dirstate_flag_has_meaningful_data |
-			            dirstate_flag_has_meaningful_mtime);
+			            dirstate_flag_has_file_mtime);
 			t->mode = mode;
 			t->size = size;
 			t->mtime = mtime;
@@ -395,7 +395,7 @@
    to make sure it is correct. */
 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
 {
-	self->flags &= ~dirstate_flag_has_meaningful_mtime;
+	self->flags &= ~dirstate_flag_has_file_mtime;
 	Py_RETURN_NONE;
 }
 
@@ -409,7 +409,7 @@
 	}
 	self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
 	              dirstate_flag_has_meaningful_data |
-	              dirstate_flag_has_meaningful_mtime;
+	              dirstate_flag_has_file_mtime;
 	self->mode = mode;
 	self->size = size;
 	self->mtime = mtime;
@@ -419,7 +419,7 @@
 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
 {
 	self->flags |= dirstate_flag_wc_tracked;
-	self->flags &= ~dirstate_flag_has_meaningful_mtime;
+	self->flags &= ~dirstate_flag_has_file_mtime;
 	Py_RETURN_NONE;
 }
 
@@ -437,7 +437,7 @@
 	if (self->flags & dirstate_flag_p2_info) {
 		self->flags &= ~(dirstate_flag_p2_info |
 		                 dirstate_flag_has_meaningful_data |
-		                 dirstate_flag_has_meaningful_mtime);
+		                 dirstate_flag_has_file_mtime);
 		self->mode = 0;
 		self->mtime = 0;
 		self->size = 0;
--- a/mercurial/cext/util.h	Thu Oct 14 16:06:31 2021 +0200
+++ b/mercurial/cext/util.h	Thu Oct 14 16:39:16 2021 +0200
@@ -35,9 +35,10 @@
 static const unsigned char dirstate_flag_p1_tracked = 1 << 1;
 static const unsigned char dirstate_flag_p2_info = 1 << 2;
 static const unsigned char dirstate_flag_has_meaningful_data = 1 << 3;
-static const unsigned char dirstate_flag_has_meaningful_mtime = 1 << 4;
-static const unsigned char dirstate_flag_mode_exec_perm = 1 << 5;
-static const unsigned char dirstate_flag_mode_is_symlink = 1 << 6;
+static const unsigned char dirstate_flag_has_file_mtime = 1 << 4;
+static const unsigned char dirstate_flag_has_directory_mtime = 1 << 5;
+static const unsigned char dirstate_flag_mode_exec_perm = 1 << 6;
+static const unsigned char dirstate_flag_mode_is_symlink = 1 << 7;
 
 extern PyTypeObject dirstateItemType;
 #define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateItemType)
--- a/mercurial/helptext/internals/dirstate-v2.txt	Thu Oct 14 16:06:31 2021 +0200
+++ b/mercurial/helptext/internals/dirstate-v2.txt	Thu Oct 14 16:39:16 2021 +0200
@@ -379,9 +379,10 @@
     P1_TRACKED = 1 << 1
     P2_INFO = 1 << 2
     HAS_MODE_AND_SIZE = 1 << 3
-    HAS_MTIME = 1 << 4
-    MODE_EXEC_PERM = 1 << 5
-    MODE_IS_SYMLINK = 1 << 6
+    HAS_FILE_MTIME = 1 << 4
+    HAS_DIRECTORY_MTIME = 1 << 5
+    MODE_EXEC_PERM = 1 << 6
+    MODE_IS_SYMLINK = 1 << 7
 
   The meaning of each bit is described below.
 
@@ -401,10 +402,25 @@
   The seconds component of an `mtime` field described below,
   as a 32-bit integer.
   Unlike in dirstate-v1, negative values are not used.
+  When `mtime` is used, this is number of seconds since the Unix epoch
+  truncated to its lower 31 bits.
 
 * Offset 40:
   The nanoseconds component of an `mtime` field described below,
   as a 32-bit integer.
+  When `mtime` is used,
+  this is the number of nanoseconds since `mtime.seconds`,
+  always stritctly less than one billion.
+
+  This may be zero if more precision is not available.
+  (This can happen because of limitations in any of Mercurial, Python,
+  libc, the operating system, …)
+
+  When comparing two mtimes and either has this component set to zero,
+  the sub-second precision of both should be ignored.
+  False positives when checking mtime equality due to clock resolution
+  are always possible and the status algorithm needs to deal with them,
+  but having too many false negatives could be harmful too.
 
 * (Offset 44: end of this node)
 
@@ -454,21 +470,18 @@
     If this is unset the expected size, permission, and file type are unknown.
     The `size` field is unused (set to zero).
 
-`HAS_MTIME`
-    If unset, the `mtime` field is unused (set to zero).
-    If set, it contains a timestamp represented as
-    - the number of seconds since the Unix epoch,
-      truncated to its lower 31 bits.
-    - and the number of nanoseconds since `mtime.seconds`,
-      always stritctly less than one billion.
-      This may be zero if more precision is not available.
-      (This can happen because of limitations in any of Mercurial, Python,
-      libc, the operating system, …)
+`HAS_FILE_MTIME`
+    Must be unset for untracked nodes.
+    If this and `HAS_DIRECTORY_MTIME` are both unset,
+    the `mtime` field is unused (set to zero).
+    If this is set, `mtime` is the modification time
+    expected for the file to be considered clean.
 
-    If set for a file tracked anywhere,
-    `mtime` is the expected modification time for the file to be clean.
-
-    If set for an untracked node, at some point,
+`HAS_DIRECTORY_MTIME`
+    Must be unset for file tracked anywhere.
+    If this and `HAS_DIRECTORY_MTIME` are both unset,
+    the `mtime` field is unused (set to zero).
+    If this is set, at some point,
     this path in the working directory was observed:
 
     - To be a directory
--- a/mercurial/pure/parsers.py	Thu Oct 14 16:06:31 2021 +0200
+++ b/mercurial/pure/parsers.py	Thu Oct 14 16:39:16 2021 +0200
@@ -49,9 +49,10 @@
 DIRSTATE_V2_P1_TRACKED = 1 << 1
 DIRSTATE_V2_P2_INFO = 1 << 2
 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
-DIRSTATE_V2_HAS_MTIME = 1 << 4
-DIRSTATE_V2_MODE_EXEC_PERM = 1 << 5
-DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 6
+DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4
+_DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5  # Unused when Rust is not available
+DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
+DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
 
 
 @attr.s(slots=True, init=False)
@@ -138,7 +139,7 @@
             p1_tracked=bool(flags & DIRSTATE_V2_P1_TRACKED),
             p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
             has_meaningful_data=has_mode_size,
-            has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_MTIME),
+            has_meaningful_mtime=bool(flags & DIRSTATE_V2_HAS_FILE_MTIME),
             parentfiledata=(mode, size, mtime),
         )
 
@@ -329,7 +330,7 @@
             if stat.S_ISLNK(self.mode):
                 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
         if self._mtime is not None:
-            flags |= DIRSTATE_V2_HAS_MTIME
+            flags |= DIRSTATE_V2_HAS_FILE_MTIME
         return (flags, self._size or 0, self._mtime or 0)
 
     def v1_state(self):
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Thu Oct 14 16:06:31 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Thu Oct 14 16:39:16 2021 +0200
@@ -106,9 +106,10 @@
         const P1_TRACKED = 1 << 1;
         const P2_INFO = 1 << 2;
         const HAS_MODE_AND_SIZE = 1 << 3;
-        const HAS_MTIME = 1 << 4;
-        const MODE_EXEC_PERM = 1 << 5;
-        const MODE_IS_SYMLINK = 1 << 6;
+        const HAS_FILE_MTIME = 1 << 4;
+        const HAS_DIRECTORY_MTIME = 1 << 5;
+        const MODE_EXEC_PERM = 1 << 6;
+        const MODE_IS_SYMLINK = 1 << 7;
     }
 }
 
@@ -320,13 +321,15 @@
     pub(super) fn cached_directory_mtime(
         &self,
     ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
-        Ok(
-            if self.flags().contains(Flags::HAS_MTIME) && !self.has_entry() {
-                Some(self.mtime.try_into()?)
+        if self.flags().contains(Flags::HAS_DIRECTORY_MTIME) {
+            if self.flags().contains(Flags::HAS_FILE_MTIME) {
+                Err(DirstateV2ParseError)
             } else {
-                None
-            },
-        )
+                Ok(Some(self.mtime.try_into()?))
+            }
+        } else {
+            Ok(None)
+        }
     }
 
     fn synthesize_unix_mode(&self) -> u32 {
@@ -353,7 +356,7 @@
         } else {
             None
         };
-        let mtime = if self.flags().contains(Flags::HAS_MTIME) {
+        let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME) {
             Some(self.mtime.truncated_seconds.into())
         } else {
             None
@@ -422,7 +425,7 @@
             0.into()
         };
         let mtime = if let Some(m) = mtime_opt {
-            flags.insert(Flags::HAS_MTIME);
+            flags.insert(Flags::HAS_FILE_MTIME);
             PackedTruncatedTimestamp {
                 truncated_seconds: m.into(),
                 nanoseconds: 0.into(),
@@ -580,9 +583,11 @@
                         dirstate_map::NodeData::Entry(entry) => {
                             Node::from_dirstate_entry(entry)
                         }
-                        dirstate_map::NodeData::CachedDirectory { mtime } => {
-                            (Flags::HAS_MTIME, 0.into(), (*mtime).into())
-                        }
+                        dirstate_map::NodeData::CachedDirectory { mtime } => (
+                            Flags::HAS_DIRECTORY_MTIME,
+                            0.into(),
+                            (*mtime).into(),
+                        ),
                         dirstate_map::NodeData::None => (
                             Flags::empty(),
                             0.into(),