dirstate: remove need_delay logic
authorPierre-Yves David <pierre-yves.david@octobus.net>
Fri, 19 Nov 2021 03:04:42 +0100
changeset 48392 434de12918fd
parent 48391 b80e5e75d51e
child 48393 1a8a70b4b0ad
dirstate: remove need_delay logic Now that allĀ¹ stored mtime are non ambiguous, we no longer need to apply the `need_delay` step. The need delay logic was not great are mtime gathered during longer operation could be ambiguous but younger than the `dirstate.write` call time. So, we don't need that logic anymore and can drop it This make the code much simpler. The code related to the test extension faking the dirstate write is now obsolete and associated test will be migrated as follow up. They currently do not break. [1] except the ones from `hg update`, but `need_delay` no longer help for them either. Differential Revision: https://phab.mercurial-scm.org/D11796
mercurial/cext/parsers.c
mercurial/dirstate.py
mercurial/dirstatemap.py
mercurial/dirstateutils/v2.py
mercurial/pure/parsers.py
rust/hg-core/src/dirstate/entry.rs
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-cpython/src/dirstate/dirstate_map.rs
rust/hg-cpython/src/dirstate/item.rs
tests/fakedirstatewritetime.py
tests/test-merge1.t
tests/test-subrepo.t
--- a/mercurial/cext/parsers.c	Mon Oct 25 11:36:22 2021 +0200
+++ b/mercurial/cext/parsers.c	Fri Nov 19 03:04:42 2021 +0100
@@ -320,21 +320,6 @@
 	return PyInt_FromLong(dirstate_item_c_v1_mtime(self));
 };
 
-static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
-                                          PyObject *now)
-{
-	int now_s;
-	int now_ns;
-	if (!PyArg_ParseTuple(now, "ii", &now_s, &now_ns)) {
-		return NULL;
-	}
-	if (dirstate_item_c_v1_state(self) == 'n' && self->mtime_s == now_s) {
-		Py_RETURN_TRUE;
-	} else {
-		Py_RETURN_FALSE;
-	}
-};
-
 static PyObject *dirstate_item_mtime_likely_equal_to(dirstateItemObject *self,
                                                      PyObject *other)
 {
@@ -548,8 +533,6 @@
      "return a \"size\" suitable for v1 serialization"},
     {"v1_mtime", (PyCFunction)dirstate_item_v1_mtime, METH_NOARGS,
      "return a \"mtime\" suitable for v1 serialization"},
-    {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
-     "True if the stored mtime would be ambiguous with the current time"},
     {"mtime_likely_equal_to", (PyCFunction)dirstate_item_mtime_likely_equal_to,
      METH_O, "True if the stored mtime is likely equal to the given mtime"},
     {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
@@ -922,12 +905,9 @@
 	Py_ssize_t nbytes, pos, l;
 	PyObject *k, *v = NULL, *pn;
 	char *p, *s;
-	int now_s;
-	int now_ns;
 
-	if (!PyArg_ParseTuple(args, "O!O!O!(ii):pack_dirstate", &PyDict_Type,
-	                      &map, &PyDict_Type, &copymap, &PyTuple_Type, &pl,
-	                      &now_s, &now_ns)) {
+	if (!PyArg_ParseTuple(args, "O!O!O!:pack_dirstate", &PyDict_Type, &map,
+	                      &PyDict_Type, &copymap, &PyTuple_Type, &pl)) {
 		return NULL;
 	}
 
@@ -996,21 +976,6 @@
 		mode = dirstate_item_c_v1_mode(tuple);
 		size = dirstate_item_c_v1_size(tuple);
 		mtime = dirstate_item_c_v1_mtime(tuple);
-		if (state == 'n' && tuple->mtime_s == now_s) {
-			/* See pure/parsers.py:pack_dirstate for why we do
-			 * this. */
-			mtime = -1;
-			mtime_unset = (PyObject *)dirstate_item_from_v1_data(
-			    state, mode, size, mtime);
-			if (!mtime_unset) {
-				goto bail;
-			}
-			if (PyDict_SetItem(map, k, mtime_unset) == -1) {
-				goto bail;
-			}
-			Py_DECREF(mtime_unset);
-			mtime_unset = NULL;
-		}
 		*p++ = state;
 		putbe32((uint32_t)mode, p);
 		putbe32((uint32_t)size, p + 4);
--- a/mercurial/dirstate.py	Mon Oct 25 11:36:22 2021 +0200
+++ b/mercurial/dirstate.py	Fri Nov 19 03:04:42 2021 +0100
@@ -779,25 +779,6 @@
             # filesystem's notion of 'now'
             now = timestamp.mtime_of(util.fstat(st))
 
-        # enough 'delaywrite' prevents 'pack_dirstate' from dropping
-        # timestamp of each entries in dirstate, because of 'now > mtime'
-        delaywrite = self._ui.configint(b'debug', b'dirstate.delaywrite')
-        if delaywrite > 0:
-            # do we have any files to delay for?
-            for f, e in pycompat.iteritems(self._map):
-                if e.need_delay(now):
-                    import time  # to avoid useless import
-
-                    # rather than sleep n seconds, sleep until the next
-                    # multiple of n seconds
-                    clock = time.time()
-                    start = int(clock) - (int(clock) % delaywrite)
-                    end = start + delaywrite
-                    time.sleep(end - clock)
-                    # trust our estimate that the end is near now
-                    now = timestamp.timestamp((end, 0))
-                    break
-
         self._map.write(tr, st, now)
         self._dirty = False
 
--- a/mercurial/dirstatemap.py	Mon Oct 25 11:36:22 2021 +0200
+++ b/mercurial/dirstatemap.py	Fri Nov 19 03:04:42 2021 +0100
@@ -446,11 +446,11 @@
 
     def write(self, tr, st, now):
         if self._use_dirstate_v2:
-            packed, meta = v2.pack_dirstate(self._map, self.copymap, now)
+            packed, meta = v2.pack_dirstate(self._map, self.copymap)
             self.write_v2_no_append(tr, st, meta, packed)
         else:
             packed = parsers.pack_dirstate(
-                self._map, self.copymap, self.parents(), now
+                self._map, self.copymap, self.parents()
             )
             st.write(packed)
             st.close()
@@ -658,7 +658,7 @@
         def write(self, tr, st, now):
             if not self._use_dirstate_v2:
                 p1, p2 = self.parents()
-                packed = self._map.write_v1(p1, p2, now)
+                packed = self._map.write_v1(p1, p2)
                 st.write(packed)
                 st.close()
                 self._dirtyparents = False
@@ -666,7 +666,7 @@
 
             # We can only append to an existing data file if there is one
             can_append = self.docket.uuid is not None
-            packed, meta, append = self._map.write_v2(now, can_append)
+            packed, meta, append = self._map.write_v2(can_append)
             if append:
                 docket = self.docket
                 data_filename = docket.data_filename()
--- a/mercurial/dirstateutils/v2.py	Mon Oct 25 11:36:22 2021 +0200
+++ b/mercurial/dirstateutils/v2.py	Fri Nov 19 03:04:42 2021 +0100
@@ -174,12 +174,10 @@
         )
 
 
-def pack_dirstate(map, copy_map, now):
+def pack_dirstate(map, copy_map):
     """
     Pack `map` and `copy_map` into the dirstate v2 binary format and return
     the bytearray.
-    `now` is a timestamp of the current filesystem time used to detect race
-    conditions in writing the dirstate to disk, see inline comment.
 
     The on-disk format expects a tree-like structure where the leaves are
     written first (and sorted per-directory), going up levels until the root
@@ -284,17 +282,6 @@
     stack.append(current_node)
 
     for index, (path, entry) in enumerate(sorted_map, 1):
-        if entry.need_delay(now):
-            # The file was last modified "simultaneously" with the current
-            # write to dirstate (i.e. within the same second for file-
-            # systems with a granularity of 1 sec). This commonly happens
-            # for at least a couple of files on 'update'.
-            # The user could change the file without changing its size
-            # within the same second. Invalidate the file's mtime in
-            # dirstate, forcing future 'status' calls to compare the
-            # contents of the file if the size is the same. This prevents
-            # mistakenly treating such files as clean.
-            entry.set_possibly_dirty()
         nodes_with_entry_count += 1
         if path in copy_map:
             nodes_with_copy_source_count += 1
--- a/mercurial/pure/parsers.py	Mon Oct 25 11:36:22 2021 +0200
+++ b/mercurial/pure/parsers.py	Fri Nov 19 03:04:42 2021 +0100
@@ -536,10 +536,6 @@
         else:
             return self._mtime_s
 
-    def need_delay(self, now):
-        """True if the stored mtime would be ambiguous with the current time"""
-        return self.v1_state() == b'n' and self._mtime_s == now[0]
-
 
 def gettype(q):
     return int(q & 0xFFFF)
@@ -905,23 +901,11 @@
     return parents
 
 
-def pack_dirstate(dmap, copymap, pl, now):
+def pack_dirstate(dmap, copymap, pl):
     cs = stringio()
     write = cs.write
     write(b"".join(pl))
     for f, e in pycompat.iteritems(dmap):
-        if e.need_delay(now):
-            # The file was last modified "simultaneously" with the current
-            # write to dirstate (i.e. within the same second for file-
-            # systems with a granularity of 1 sec). This commonly happens
-            # for at least a couple of files on 'update'.
-            # The user could change the file without changing its size
-            # within the same second. Invalidate the file's mtime in
-            # dirstate, forcing future 'status' calls to compare the
-            # contents of the file if the size is the same. This prevents
-            # mistakenly treating such files as clean.
-            e.set_possibly_dirty()
-
         if f in copymap:
             f = b"%s\0%s" % (f, copymap[f])
         e = _pack(
--- a/rust/hg-core/src/dirstate/entry.rs	Mon Oct 25 11:36:22 2021 +0200
+++ b/rust/hg-core/src/dirstate/entry.rs	Fri Nov 19 03:04:42 2021 +0100
@@ -590,16 +590,6 @@
     pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
         (self.state().into(), self.mode(), self.size(), self.mtime())
     }
-
-    /// True if the stored mtime would be ambiguous with the current time
-    pub fn need_delay(&self, now: TruncatedTimestamp) -> bool {
-        if let Some(mtime) = self.mtime {
-            self.state() == EntryState::Normal
-                && mtime.truncated_seconds() == now.truncated_seconds()
-        } else {
-            false
-        }
-    }
 }
 
 impl EntryState {
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Mon Oct 25 11:36:22 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Fri Nov 19 03:04:42 2021 +0100
@@ -677,25 +677,6 @@
         })
     }
 
-    fn clear_known_ambiguous_mtimes(
-        &mut self,
-        paths: &[impl AsRef<HgPath>],
-    ) -> Result<(), DirstateV2ParseError> {
-        for path in paths {
-            if let Some(node) = Self::get_node_mut(
-                self.on_disk,
-                &mut self.unreachable_bytes,
-                &mut self.root,
-                path.as_ref(),
-            )? {
-                if let NodeData::Entry(entry) = &mut node.data {
-                    entry.set_possibly_dirty();
-                }
-            }
-        }
-        Ok(())
-    }
-
     fn count_dropped_path(unreachable_bytes: &mut u32, path: &Cow<HgPath>) {
         if let Cow::Borrowed(path) = path {
             *unreachable_bytes += path.len() as u32
@@ -930,29 +911,20 @@
     pub fn pack_v1(
         &mut self,
         parents: DirstateParents,
-        now: TruncatedTimestamp,
     ) -> Result<Vec<u8>, DirstateError> {
         let map = self.get_map_mut();
-        let mut ambiguous_mtimes = Vec::new();
         // Optizimation (to be measured?): pre-compute size to avoid `Vec`
         // reallocations
         let mut size = parents.as_bytes().len();
         for node in map.iter_nodes() {
             let node = node?;
-            if let Some(entry) = node.entry()? {
+            if node.entry()?.is_some() {
                 size += packed_entry_size(
                     node.full_path(map.on_disk)?,
                     node.copy_source(map.on_disk)?,
                 );
-                if entry.need_delay(now) {
-                    ambiguous_mtimes.push(
-                        node.full_path_borrowed(map.on_disk)?
-                            .detach_from_tree(),
-                    )
-                }
             }
         }
-        map.clear_known_ambiguous_mtimes(&ambiguous_mtimes)?;
 
         let mut packed = Vec::with_capacity(size);
         packed.extend(parents.as_bytes());
@@ -978,26 +950,9 @@
     #[timed]
     pub fn pack_v2(
         &mut self,
-        now: TruncatedTimestamp,
         can_append: bool,
     ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
         let map = self.get_map_mut();
-        let mut paths = Vec::new();
-        for node in map.iter_nodes() {
-            let node = node?;
-            if let Some(entry) = node.entry()? {
-                if entry.need_delay(now) {
-                    paths.push(
-                        node.full_path_borrowed(map.on_disk)?
-                            .detach_from_tree(),
-                    )
-                }
-            }
-        }
-        // Borrow of `self` ends here since we collect cloned paths
-
-        map.clear_known_ambiguous_mtimes(&paths)?;
-
         on_disk::write(map, can_append)
     }
 
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs	Mon Oct 25 11:36:22 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs	Fri Nov 19 03:04:42 2021 +0100
@@ -18,7 +18,7 @@
 
 use crate::{
     dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
-    dirstate::item::{timestamp, DirstateItem},
+    dirstate::item::DirstateItem,
     pybytes_deref::PyBytesDeref,
 };
 use hg::{
@@ -194,16 +194,13 @@
         &self,
         p1: PyObject,
         p2: PyObject,
-        now: (u32, u32)
     ) -> PyResult<PyBytes> {
-        let now = timestamp(py, now)?;
-
         let mut inner = self.inner(py).borrow_mut();
         let parents = DirstateParents {
             p1: extract_node_id(py, &p1)?,
             p2: extract_node_id(py, &p2)?,
         };
-        let result = inner.pack_v1(parents, now);
+        let result = inner.pack_v1(parents);
         match result {
             Ok(packed) => Ok(PyBytes::new(py, &packed)),
             Err(_) => Err(PyErr::new::<exc::OSError, _>(
@@ -218,13 +215,10 @@
     /// instead of written to a new data file (False).
     def write_v2(
         &self,
-        now: (u32, u32),
         can_append: bool,
     ) -> PyResult<PyObject> {
-        let now = timestamp(py, now)?;
-
         let mut inner = self.inner(py).borrow_mut();
-        let result = inner.pack_v2(now, can_append);
+        let result = inner.pack_v2(can_append);
         match result {
             Ok((packed, tree_metadata, append)) => {
                 let packed = PyBytes::new(py, &packed);
--- a/rust/hg-cpython/src/dirstate/item.rs	Mon Oct 25 11:36:22 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/item.rs	Fri Nov 19 03:04:42 2021 +0100
@@ -194,11 +194,6 @@
         Ok(mtime)
     }
 
-    def need_delay(&self, now: (u32, u32)) -> PyResult<bool> {
-        let now = timestamp(py, now)?;
-        Ok(self.entry(py).get().need_delay(now))
-    }
-
     def mtime_likely_equal_to(&self, other: (u32, u32)) -> PyResult<bool> {
         if let Some(mtime) = self.entry(py).get().truncated_mtime() {
             Ok(mtime.likely_equal(timestamp(py, other)?))
--- a/tests/fakedirstatewritetime.py	Mon Oct 25 11:36:22 2021 +0200
+++ b/tests/fakedirstatewritetime.py	Fri Nov 19 03:04:42 2021 +0100
@@ -37,14 +37,8 @@
 has_rust_dirstate = policy.importrust('dirstate') is not None
 
 
-def pack_dirstate(fakenow, orig, dmap, copymap, pl, now):
-    # execute what original parsers.pack_dirstate should do actually
-    # for consistency
-    for f, e in dmap.items():
-        if e.need_delay(now):
-            e.set_possibly_dirty()
-
-    return orig(dmap, copymap, pl, fakenow)
+def pack_dirstate(orig, dmap, copymap, pl):
+    return orig(dmap, copymap, pl)
 
 
 def fakewrite(ui, func):
@@ -67,19 +61,19 @@
         # The Rust implementation does not use public parse/pack dirstate
         # to prevent conversion round-trips
         orig_dirstatemap_write = dirstatemapmod.dirstatemap.write
-        wrapper = lambda self, tr, st, now: orig_dirstatemap_write(
-            self, tr, st, fakenow
-        )
+        wrapper = lambda self, tr, st: orig_dirstatemap_write(self, tr, st)
         dirstatemapmod.dirstatemap.write = wrapper
 
     orig_get_fs_now = timestamp.get_fs_now
-    wrapper = lambda *args: pack_dirstate(fakenow, orig_pack_dirstate, *args)
+    wrapper = lambda *args: pack_dirstate(orig_pack_dirstate, *args)
 
     orig_module = parsers
     orig_pack_dirstate = parsers.pack_dirstate
 
     orig_module.pack_dirstate = wrapper
-    timestamp.get_fs_now = lambda *args: fakenow
+    timestamp.get_fs_now = (
+        lambda *args: fakenow
+    )  # XXX useless for this purpose now
     try:
         return func()
     finally:
--- a/tests/test-merge1.t	Mon Oct 25 11:36:22 2021 +0200
+++ b/tests/test-merge1.t	Fri Nov 19 03:04:42 2021 +0100
@@ -349,6 +349,10 @@
 aren't changed), even if none of mode, size and timestamp of them
 isn't changed on the filesystem (see also issue4583).
 
+This test is now "best effort" as the mechanism to prevent such race are
+getting better, it get more complicated to test a specific scenario that would
+trigger it. If you see flakyness here, there is a race.
+
   $ cat > $TESTTMP/abort.py <<EOF
   > from __future__ import absolute_import
   > # emulate aborting before "recordupdates()". in this case, files
@@ -365,13 +369,6 @@
   >     extensions.wrapfunction(merge, "applyupdates", applyupdates)
   > EOF
 
-  $ cat >> .hg/hgrc <<EOF
-  > [fakedirstatewritetime]
-  > # emulate invoking dirstate.write() via repo.status()
-  > # at 2000-01-01 00:00
-  > fakenow = 200001010000
-  > EOF
-
 (file gotten from other revision)
 
   $ hg update -q -C 2
@@ -381,12 +378,8 @@
   $ hg update -q -C 3
   $ cat b
   This is file b1
-  $ touch -t 200001010000 b
-  $ hg debugrebuildstate
-
   $ cat >> .hg/hgrc <<EOF
   > [extensions]
-  > fakedirstatewritetime = $TESTDIR/fakedirstatewritetime.py
   > abort = $TESTTMP/abort.py
   > EOF
   $ hg merge 5
@@ -394,13 +387,11 @@
   [255]
   $ cat >> .hg/hgrc <<EOF
   > [extensions]
-  > fakedirstatewritetime = !
   > abort = !
   > EOF
 
   $ cat b
   THIS IS FILE B5
-  $ touch -t 200001010000 b
   $ hg status -A b
   M b
 
@@ -413,12 +404,10 @@
 
   $ cat b
   this is file b6
-  $ touch -t 200001010000 b
-  $ hg debugrebuildstate
+  $ hg status
 
   $ cat >> .hg/hgrc <<EOF
   > [extensions]
-  > fakedirstatewritetime = $TESTDIR/fakedirstatewritetime.py
   > abort = $TESTTMP/abort.py
   > EOF
   $ hg merge --tool internal:other 5
@@ -426,13 +415,11 @@
   [255]
   $ cat >> .hg/hgrc <<EOF
   > [extensions]
-  > fakedirstatewritetime = !
   > abort = !
   > EOF
 
   $ cat b
   THIS IS FILE B5
-  $ touch -t 200001010000 b
   $ hg status -A b
   M b
 
--- a/tests/test-subrepo.t	Mon Oct 25 11:36:22 2021 +0200
+++ b/tests/test-subrepo.t	Fri Nov 19 03:04:42 2021 +0100
@@ -1021,37 +1021,21 @@
 
 test if untracked file is not overwritten
 
-(this also tests that updated .hgsubstate is treated as "modified",
-when 'merge.update()' is aborted before 'merge.recordupdates()', even
-if none of mode, size and timestamp of it isn't changed on the
-filesystem (see also issue4583))
+(this tests also has a change to update .hgsubstate and merge it within the
+same second. It should mark is are modified , even if none of mode, size and
+timestamp of it isn't changed on the filesystem (see also issue4583))
 
   $ echo issue3276_ok > repo/s/b
   $ hg -R repo2 push -f -q
-  $ touch -t 200001010000 repo/.hgsubstate
 
-  $ cat >> repo/.hg/hgrc <<EOF
-  > [fakedirstatewritetime]
-  > # emulate invoking dirstate.write() via repo.status()
-  > # at 2000-01-01 00:00
-  > fakenow = 200001010000
-  > 
-  > [extensions]
-  > fakedirstatewritetime = $TESTDIR/fakedirstatewritetime.py
-  > EOF
   $ hg -R repo update
   b: untracked file differs
   abort: untracked files in working directory differ from files in requested revision (in subrepository "s")
   [255]
-  $ cat >> repo/.hg/hgrc <<EOF
-  > [extensions]
-  > fakedirstatewritetime = !
-  > EOF
 
   $ cat repo/s/b
   issue3276_ok
   $ rm repo/s/b
-  $ touch -t 200001010000 repo/.hgsubstate
   $ hg -R repo revert --all
   reverting repo/.hgsubstate
   reverting subrepo s