dirstate-v2: adjust the meaning of directory flags
authorPierre-Yves David <pierre-yves.david@octobus.net>
Tue, 19 Oct 2021 18:18:05 +0200
changeset 48264 bb240915f69f
parent 48263 83d0bd45b662
child 48265 3861e3f6ad54
dirstate-v2: adjust the meaning of directory flags Tracking directory "explicitly" give use the opportunity to distinct between entry that are untracked because they are part of the directory structure and entry that are ignored/unknown files on the files system. The help is adjusted to the new semantic and the code now comply to it for both read and write. Differential Revision: https://phab.mercurial-scm.org/D11694
mercurial/cext/parsers.c
mercurial/cext/util.h
mercurial/dirstateutils/v2.py
mercurial/helptext/internals/dirstate-v2.txt
mercurial/pure/parsers.py
rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/mercurial/cext/parsers.c	Wed Oct 13 15:58:14 2021 +0200
+++ b/mercurial/cext/parsers.c	Tue Oct 19 18:18:05 2021 +0200
@@ -129,7 +129,7 @@
 		t->size = 0;
 	}
 	if (has_meaningful_mtime) {
-		t->flags |= dirstate_flag_has_file_mtime;
+		t->flags |= dirstate_flag_has_mtime;
 		t->mtime_s = mtime_s;
 		t->mtime_ns = mtime_ns;
 	} else {
@@ -246,7 +246,7 @@
 {
 	if (dirstate_item_c_removed(self)) {
 		return 0;
-	} else if (!(self->flags & dirstate_flag_has_file_mtime) ||
+	} else if (!(self->flags & dirstate_flag_has_mtime) ||
 	           !(self->flags & dirstate_flag_p1_tracked) ||
 	           !(self->flags & dirstate_flag_wc_tracked) ||
 	           (self->flags & dirstate_flag_p2_info)) {
@@ -318,7 +318,7 @@
 	if (!PyArg_ParseTuple(other, "ii", &other_s, &other_ns)) {
 		return NULL;
 	}
-	if ((self->flags & dirstate_flag_has_file_mtime) &&
+	if ((self->flags & dirstate_flag_has_mtime) &&
 	    self->mtime_s == other_s &&
 	    (self->mtime_ns == other_ns || self->mtime_ns == 0 ||
 	     other_ns == 0)) {
@@ -375,7 +375,7 @@
 			t->flags = (dirstate_flag_wc_tracked |
 			            dirstate_flag_p1_tracked |
 			            dirstate_flag_has_meaningful_data |
-			            dirstate_flag_has_file_mtime);
+			            dirstate_flag_has_mtime);
 			t->mode = mode;
 			t->size = size;
 			t->mtime_s = mtime;
@@ -420,7 +420,7 @@
 	if (t->flags & dirstate_flag_expected_state_is_modified) {
 		t->flags &= ~(dirstate_flag_expected_state_is_modified |
 		              dirstate_flag_has_meaningful_data |
-		              dirstate_flag_has_file_mtime);
+		              dirstate_flag_has_mtime);
 	}
 	if (t->flags & dirstate_flag_mtime_second_ambiguous) {
 		/* The current code is not able to do the more subtle comparison
@@ -428,7 +428,7 @@
 		 * mtime */
 		t->flags &= ~(dirstate_flag_mtime_second_ambiguous |
 		              dirstate_flag_has_meaningful_data |
-		              dirstate_flag_has_file_mtime);
+		              dirstate_flag_has_mtime);
 	}
 	t->mode = 0;
 	if (t->flags & dirstate_flag_has_meaningful_data) {
@@ -450,7 +450,7 @@
    to make sure it is correct. */
 static PyObject *dirstate_item_set_possibly_dirty(dirstateItemObject *self)
 {
-	self->flags &= ~dirstate_flag_has_file_mtime;
+	self->flags &= ~dirstate_flag_has_mtime;
 	Py_RETURN_NONE;
 }
 
@@ -465,7 +465,7 @@
 	}
 	self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
 	              dirstate_flag_has_meaningful_data |
-	              dirstate_flag_has_file_mtime;
+	              dirstate_flag_has_mtime;
 	self->mode = mode;
 	self->size = size;
 	self->mtime_s = mtime_s;
@@ -476,7 +476,7 @@
 static PyObject *dirstate_item_set_tracked(dirstateItemObject *self)
 {
 	self->flags |= dirstate_flag_wc_tracked;
-	self->flags &= ~dirstate_flag_has_file_mtime;
+	self->flags &= ~dirstate_flag_has_mtime;
 	Py_RETURN_NONE;
 }
 
@@ -495,7 +495,7 @@
 	if (self->flags & dirstate_flag_p2_info) {
 		self->flags &= ~(dirstate_flag_p2_info |
 		                 dirstate_flag_has_meaningful_data |
-		                 dirstate_flag_has_file_mtime);
+		                 dirstate_flag_has_mtime);
 		self->mode = 0;
 		self->size = 0;
 		self->mtime_s = 0;
--- a/mercurial/cext/util.h	Wed Oct 13 15:58:14 2021 +0200
+++ b/mercurial/cext/util.h	Tue Oct 19 18:18:05 2021 +0200
@@ -36,8 +36,8 @@
 static const int dirstate_flag_p1_tracked = 1 << 1;
 static const int dirstate_flag_p2_info = 1 << 2;
 static const int dirstate_flag_has_meaningful_data = 1 << 3;
-static const int dirstate_flag_has_file_mtime = 1 << 4;
-static const int dirstate_flag_has_directory_mtime = 1 << 5;
+static const int dirstate_flag_has_mtime = 1 << 4;
+static const int dirstate_flag_directory = 1 << 5;
 static const int dirstate_flag_mode_exec_perm = 1 << 6;
 static const int dirstate_flag_mode_is_symlink = 1 << 7;
 static const int dirstate_flag_expected_state_is_modified = 1 << 8;
--- a/mercurial/dirstateutils/v2.py	Wed Oct 13 15:58:14 2021 +0200
+++ b/mercurial/dirstateutils/v2.py	Tue Oct 19 18:18:05 2021 +0200
@@ -56,6 +56,9 @@
 assert TREE_METADATA_SIZE == TREE_METADATA.size
 assert NODE_SIZE == NODE.size
 
+# match constant in mercurial/pure/parsers.py
+DIRSTATE_V2_DIRECTORY = 1 << 5
+
 
 def parse_dirstate(map, copy_map, data, tree_metadata):
     """parse a full v2-dirstate from a binary data into dictionnaries:
@@ -83,7 +86,7 @@
     This is used by parse_dirstate to recursively fill `map` and `copy_map`.
 
     All directory specific information is ignored and do not need any
-    processing (HAS_DIRECTORY_MTIME, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)
+    processing (DIRECTORY, ALL_UNKNOWN_RECORDED, ALL_IGNORED_RECORDED)
     """
     for i in range(len):
         node_start = start + NODE_SIZE * i
@@ -150,7 +153,7 @@
             flags, size, mtime_s, mtime_ns = entry.v2_data()
         else:
             # There are no mtime-cached directories in the Python implementation
-            flags = 0
+            flags = DIRSTATE_V2_DIRECTORY
             size = 0
             mtime_s = 0
             mtime_ns = 0
--- a/mercurial/helptext/internals/dirstate-v2.txt	Wed Oct 13 15:58:14 2021 +0200
+++ b/mercurial/helptext/internals/dirstate-v2.txt	Tue Oct 19 18:18:05 2021 +0200
@@ -379,8 +379,8 @@
     P1_TRACKED = 1 << 1
     P2_INFO = 1 << 2
     HAS_MODE_AND_SIZE = 1 << 3
-    HAS_FILE_MTIME = 1 << 4
-    HAS_DIRECTORY_MTIME = 1 << 5
+    HAS_MTIME = 1 << 4
+    DIRECTORY = 1 << 5
     MODE_EXEC_PERM = 1 << 6
     MODE_IS_SYMLINK = 1 << 7
     EXPECTED_STATE_IS_MODIFIED = 1 << 8
@@ -477,37 +477,45 @@
     If this is unset the expected size, permission, and file type are unknown.
     The `size` field is unused (set to zero).
 
-`HAS_FILE_MTIME`
-    Must be unset for untracked nodes.
-    If this and `HAS_DIRECTORY_MTIME` are both unset,
-    the `mtime` field is unused (set to zero).
-    If this is set, `mtime` is the expected modification time.
+`HAS_MTIME`
+    The nodes contains a "valid" last modification time in the `mtime` field.
+
 
-`HAS_DIRECTORY_MTIME`
-    Must be unset for file tracked anywhere.
-    If this and `HAS_DIRECTORY_MTIME` are both unset,
-    the `mtime` field is unused (set to zero).
-    If this is set, at some point,
-    this path in the working directory was observed:
-
-    - To be a directory
-    - With the modification time given in `mtime`
-    - That time was already strictly in the past when observed,
-      meaning that later changes cannot happen in the same clock tick
-      and must cause a different modification time
-      (unless the system clock jumps back and we get unlucky,
-      which is not impossible but deemed unlikely enough).
-    - All direct children of this directory
-      (as returned by `std::fs::read_dir`)
-      either have a corresponding dirstate node,
-      or are ignored by ignore patterns whose hash is in tree metadata.
+    It means the `mtime` was already strictly in the past when observed,
+    meaning that later changes cannot happen in the same clock tick
+    and must cause a different modification time
+    (unless the system clock jumps back and we get unlucky,
+    which is not impossible but deemed unlikely enough).
 
     This means that if `std::fs::symlink_metadata` later reports
     the same modification time
     and ignored patterns haven’t changed,
-    a run of status that is not listing ignored files
-    can skip calling `std::fs::read_dir` again for this directory,
+    we can assume the node to be unchanged on disk.
+
+    The `mtime` field can then be used to skip more expensive lookup when
+    checking the status of "tracked" nodes.
+
+    It can also be set for node where `DIRECTORY` is set.
+    See `DIRECTORY` documentation for details.
+
+`DIRECTORY`
+    When set, this entry will match a directory that exists or existed on the
+    file system.
+
+    * When `HAS_MTIME` is set a directory has been seen on the file system and
+      `mtime` matches its last modificiation time. However, `HAS_MTIME` not being set
+      does not indicate the lack of directory on the file system.
+
+    * When not tracked anywhere, this node does not represent an ignored or
+      unknown file on disk.
+
+    If `HAS_MTIME` is set
+    and `mtime` matches the last modification time of the directory on disk,
+    the directory is unchanged
+    and we can skip calling `std::fs::read_dir` again for this directory,
     and iterate child dirstate nodes instead.
+    (as long as `ALL_UNKNOWN_RECORDED` and `ALL_IGNORED_RECORDED` are taken
+    into account)
 
 `MODE_EXEC_PERM`
     Must be unset if `HAS_MODE_AND_SIZE` is unset.
@@ -525,7 +533,7 @@
     Must be unset for untracked nodes.
     For:
     - a file tracked anywhere
-    - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_FILE_MTIME`)
+    - that has expected metadata (`HAS_MODE_AND_SIZE` and `HAS_MTIME`)
     - if that metadata matches
       metadata found in the working directory with `stat`
     This bit indicates the status of the file.
@@ -541,7 +549,7 @@
 `ALL_UNKNOWN_RECORDED`
     If set, all "unknown" children existing on disk (at the time of the last
     status) have been recorded and the `mtime` associated with
-    `HAS_DIRECTORY_MTIME` can be used for optimization even when "unknown" file
+    `DIRECTORY` can be used for optimization even when "unknown" file
     are listed.
 
     Note that the amount recorded "unknown" children can still be zero if None
@@ -554,7 +562,7 @@
 `ALL_IGNORED_RECORDED`
     If set, all "ignored" children existing on disk (at the time of the last
     status) have been recorded and the `mtime` associated with
-    `HAS_DIRECTORY_MTIME` can be used for optimization even when "ignored" file
+    `DIRECTORY` can be used for optimization even when "ignored" file
     are listed.
 
     Note that the amount recorded "ignored" children can still be zero if None
--- a/mercurial/pure/parsers.py	Wed Oct 13 15:58:14 2021 +0200
+++ b/mercurial/pure/parsers.py	Tue Oct 19 18:18:05 2021 +0200
@@ -49,8 +49,8 @@
 DIRSTATE_V2_P1_TRACKED = 1 << 1
 DIRSTATE_V2_P2_INFO = 1 << 2
 DIRSTATE_V2_HAS_MODE_AND_SIZE = 1 << 3
-DIRSTATE_V2_HAS_FILE_MTIME = 1 << 4
-_DIRSTATE_V2_HAS_DIRCTORY_MTIME = 1 << 5  # Unused when Rust is not available
+DIRSTATE_V2_HAS_MTIME = 1 << 4
+DIRSTATE_V2_DIRECTORY = 1 << 5
 DIRSTATE_V2_MODE_EXEC_PERM = 1 << 6
 DIRSTATE_V2_MODE_IS_SYMLINK = 1 << 7
 DIRSTATE_V2_EXPECTED_STATE_IS_MODIFIED = 1 << 8
@@ -140,7 +140,7 @@
     def from_v2_data(cls, flags, size, mtime_s, mtime_ns):
         """Build a new DirstateItem object from V2 data"""
         has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
-        has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME)
+        has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_MTIME)
         if flags & DIRSTATE_V2_MTIME_SECOND_AMBIGUOUS:
             # The current code is not able to do the more subtle comparison that the
             # MTIME_SECOND_AMBIGUOUS requires. So we ignore the mtime
@@ -462,7 +462,7 @@
             if stat.S_ISLNK(self.mode):
                 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
         if self._mtime_s is not None:
-            flags |= DIRSTATE_V2_HAS_FILE_MTIME
+            flags |= DIRSTATE_V2_HAS_MTIME
 
         if self._fallback_exec is not None:
             flags |= DIRSTATE_V2_HAS_FALLBACK_EXEC
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Wed Oct 13 15:58:14 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Tue Oct 19 18:18:05 2021 +0200
@@ -106,8 +106,8 @@
         const P1_TRACKED = 1 << 1;
         const P2_INFO = 1 << 2;
         const HAS_MODE_AND_SIZE = 1 << 3;
-        const HAS_FILE_MTIME = 1 << 4;
-        const HAS_DIRECTORY_MTIME = 1 << 5;
+        const HAS_MTIME = 1 << 4;
+        const DIRECTORY = 1 << 5;
         const MODE_EXEC_PERM = 1 << 6;
         const MODE_IS_SYMLINK = 1 << 7;
         const EXPECTED_STATE_IS_MODIFIED = 1 << 8;
@@ -329,16 +329,14 @@
     pub(super) fn cached_directory_mtime(
         &self,
     ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
-        // For now we do not have code to handle ALL_UNKNOWN_RECORDED, so we
-        // ignore the mtime if the flag is set.
-        if self.flags().contains(Flags::HAS_DIRECTORY_MTIME)
+        // For now we do not have code to handle the absence of
+        // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is
+        // unset.
+        if self.flags().contains(Flags::DIRECTORY)
+            && self.flags().contains(Flags::HAS_MTIME)
             && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
         {
-            if self.flags().contains(Flags::HAS_FILE_MTIME) {
-                Err(DirstateV2ParseError)
-            } else {
-                Ok(Some(self.mtime.try_into()?))
-            }
+            Ok(Some(self.mtime.try_into()?))
         } else {
             Ok(None)
         }
@@ -370,7 +368,8 @@
         } else {
             None
         };
-        let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
+        let mtime = if self.flags().contains(Flags::HAS_MTIME)
+            && !self.flags().contains(Flags::DIRECTORY)
             && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
             // The current code is not able to do the more subtle comparison that the
             // MTIME_SECOND_AMBIGUOUS requires. So we ignore the mtime
@@ -453,7 +452,7 @@
             0.into()
         };
         let mtime = if let Some(m) = mtime_opt {
-            flags.insert(Flags::HAS_FILE_MTIME);
+            flags.insert(Flags::HAS_MTIME);
             m.into()
         } else {
             PackedTruncatedTimestamp::null()
@@ -631,13 +630,14 @@
                             // We never set ALL_IGNORED_RECORDED since we
                             // don't track that case
                             // currently.
-                            Flags::HAS_DIRECTORY_MTIME
+                            Flags::DIRECTORY
+                                | Flags::HAS_MTIME
                                 | Flags::ALL_UNKNOWN_RECORDED,
                             0.into(),
                             (*mtime).into(),
                         ),
                         dirstate_map::NodeData::None => (
-                            Flags::empty(),
+                            Flags::DIRECTORY,
                             0.into(),
                             PackedTruncatedTimestamp::null(),
                         ),