dirstate: store mtimes with nanosecond precision in memory
authorSimon Sapin <simon.sapin@octobus.net>
Mon, 18 Oct 2021 11:23:07 +0200
changeset 48260 269ff8978086
parent 48259 84f6b0c41b90
child 48261 9205d9be8b41
dirstate: store mtimes with nanosecond precision in memory Keep integer seconds since the Unix epoch, together with integer nanoseconds in the `0 <= n < 1e9` range. For now, nanoseconds are still always zero. This commit is about data structure changes. Differential Revision: https://phab.mercurial-scm.org/D11684
mercurial/cext/parsers.c
mercurial/cext/util.h
mercurial/dirstate.py
mercurial/dirstatemap.py
mercurial/dirstateutils/timestamp.py
mercurial/dirstateutils/v2.py
mercurial/merge.py
mercurial/pure/parsers.py
rust/hg-core/src/dirstate/entry.rs
rust/hg-core/src/dirstate/parsers.rs
rust/hg-core/src/dirstate/status.rs
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/on_disk.rs
rust/hg-core/src/dirstate_tree/status.rs
rust/hg-cpython/src/dirstate.rs
rust/hg-cpython/src/dirstate/dirstate_map.rs
rust/hg-cpython/src/dirstate/item.rs
rust/hg-cpython/src/dirstate/status.rs
rust/rhg/src/commands/status.rs
tests/fakedirstatewritetime.py
--- a/mercurial/cext/parsers.c	Tue Oct 19 21:03:13 2021 +0200
+++ b/mercurial/cext/parsers.c	Mon Oct 18 11:23:07 2021 +0200
@@ -57,7 +57,8 @@
 	int has_meaningful_mtime;
 	int mode;
 	int size;
-	int mtime;
+	int mtime_s;
+	int mtime_ns;
 	PyObject *parentfiledata;
 	PyObject *fallback_exec;
 	PyObject *fallback_symlink;
@@ -111,15 +112,10 @@
 	}
 
 	if (parentfiledata != Py_None) {
-		if (!PyTuple_CheckExact(parentfiledata)) {
-			PyErr_SetString(
-			    PyExc_TypeError,
-			    "parentfiledata should be a Tuple or None");
+		if (!PyArg_ParseTuple(parentfiledata, "ii(ii)", &mode, &size,
+		                      &mtime_s, &mtime_ns)) {
 			return NULL;
 		}
-		mode = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 0));
-		size = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 1));
-		mtime = (int)PyLong_AsLong(PyTuple_GetItem(parentfiledata, 2));
 	} else {
 		has_meaningful_data = 0;
 		has_meaningful_mtime = 0;
@@ -134,9 +130,11 @@
 	}
 	if (has_meaningful_mtime) {
 		t->flags |= dirstate_flag_has_file_mtime;
-		t->mtime = mtime;
+		t->mtime_s = mtime_s;
+		t->mtime_ns = mtime_ns;
 	} else {
-		t->mtime = 0;
+		t->mtime_s = 0;
+		t->mtime_ns = 0;
 	}
 	return (PyObject *)t;
 }
@@ -254,7 +252,7 @@
 	           (self->flags & dirstate_flag_p2_info)) {
 		return ambiguous_time;
 	} else {
-		return self->mtime;
+		return self->mtime_s;
 	}
 }
 
@@ -272,7 +270,8 @@
 	} else {
 		flags &= ~dirstate_flag_mode_is_symlink;
 	}
-	return Py_BuildValue("iii", flags, self->size, self->mtime);
+	return Py_BuildValue("iiii", flags, self->size, self->mtime_s,
+	                     self->mtime_ns);
 };
 
 static PyObject *dirstate_item_v1_state(dirstateItemObject *self)
@@ -297,14 +296,30 @@
 };
 
 static PyObject *dirstate_item_need_delay(dirstateItemObject *self,
-                                          PyObject *value)
+                                          PyObject *now)
 {
-	long now;
-	if (!pylong_to_long(value, &now)) {
+	int now_s;
+	int now_ns;
+	if (!PyArg_ParseTuple(now, "ii", &now_s, &now_ns)) {
 		return NULL;
 	}
-	if (dirstate_item_c_v1_state(self) == 'n' &&
-	    dirstate_item_c_v1_mtime(self) == now) {
+	if (dirstate_item_c_v1_state(self) == 'n' && self->mtime_s == now_s) {
+		Py_RETURN_TRUE;
+	} else {
+		Py_RETURN_FALSE;
+	}
+};
+
+static PyObject *dirstate_item_mtime_likely_equal_to(dirstateItemObject *self,
+                                                     PyObject *other)
+{
+	int other_s;
+	int other_ns;
+	if (!PyArg_ParseTuple(other, "ii", &other_s, &other_ns)) {
+		return NULL;
+	}
+	if ((self->flags & dirstate_flag_has_file_mtime) &&
+	    self->mtime_s == other_s && self->mtime_ns == other_ns) {
 		Py_RETURN_TRUE;
 	} else {
 		Py_RETURN_FALSE;
@@ -324,7 +339,8 @@
 	t->flags = 0;
 	t->mode = 0;
 	t->size = 0;
-	t->mtime = 0;
+	t->mtime_s = 0;
+	t->mtime_ns = 0;
 
 	if (state == 'm') {
 		t->flags = (dirstate_flag_wc_tracked |
@@ -360,7 +376,7 @@
 			            dirstate_flag_has_file_mtime);
 			t->mode = mode;
 			t->size = size;
-			t->mtime = mtime;
+			t->mtime_s = mtime;
 		}
 	} else {
 		PyErr_Format(PyExc_RuntimeError,
@@ -395,7 +411,8 @@
 	if (!t) {
 		return NULL;
 	}
-	if (!PyArg_ParseTuple(args, "iii", &t->flags, &t->size, &t->mtime)) {
+	if (!PyArg_ParseTuple(args, "iiii", &t->flags, &t->size, &t->mtime_s,
+	                      &t->mtime_ns)) {
 		return NULL;
 	}
 	if (t->flags & dirstate_flag_expected_state_is_modified) {
@@ -431,8 +448,9 @@
 static PyObject *dirstate_item_set_clean(dirstateItemObject *self,
                                          PyObject *args)
 {
-	int size, mode, mtime;
-	if (!PyArg_ParseTuple(args, "iii", &mode, &size, &mtime)) {
+	int size, mode, mtime_s, mtime_ns;
+	if (!PyArg_ParseTuple(args, "ii(ii)", &mode, &size, &mtime_s,
+	                      &mtime_ns)) {
 		return NULL;
 	}
 	self->flags = dirstate_flag_wc_tracked | dirstate_flag_p1_tracked |
@@ -440,7 +458,8 @@
 	              dirstate_flag_has_file_mtime;
 	self->mode = mode;
 	self->size = size;
-	self->mtime = mtime;
+	self->mtime_s = mtime_s;
+	self->mtime_ns = mtime_ns;
 	Py_RETURN_NONE;
 }
 
@@ -455,8 +474,9 @@
 {
 	self->flags &= ~dirstate_flag_wc_tracked;
 	self->mode = 0;
-	self->mtime = 0;
 	self->size = 0;
+	self->mtime_s = 0;
+	self->mtime_ns = 0;
 	Py_RETURN_NONE;
 }
 
@@ -467,8 +487,9 @@
 		                 dirstate_flag_has_meaningful_data |
 		                 dirstate_flag_has_file_mtime);
 		self->mode = 0;
-		self->mtime = 0;
 		self->size = 0;
+		self->mtime_s = 0;
+		self->mtime_ns = 0;
 	}
 	Py_RETURN_NONE;
 }
@@ -485,6 +506,8 @@
      "return a \"mtime\" suitable for v1 serialization"},
     {"need_delay", (PyCFunction)dirstate_item_need_delay, METH_O,
      "True if the stored mtime would be ambiguous with the current time"},
+    {"mtime_likely_equal_to", (PyCFunction)dirstate_item_mtime_likely_equal_to,
+     METH_O, "True if the stored mtime is likely equal to the given mtime"},
     {"from_v1_data", (PyCFunction)dirstate_item_from_v1_meth,
      METH_VARARGS | METH_CLASS, "build a new DirstateItem object from V1 data"},
     {"from_v2_data", (PyCFunction)dirstate_item_from_v2_meth,
@@ -855,11 +878,12 @@
 	Py_ssize_t nbytes, pos, l;
 	PyObject *k, *v = NULL, *pn;
 	char *p, *s;
-	int now;
+	int now_s;
+	int now_ns;
 
-	if (!PyArg_ParseTuple(args, "O!O!O!i:pack_dirstate", &PyDict_Type, &map,
-	                      &PyDict_Type, &copymap, &PyTuple_Type, &pl,
-	                      &now)) {
+	if (!PyArg_ParseTuple(args, "O!O!O!(ii):pack_dirstate", &PyDict_Type,
+	                      &map, &PyDict_Type, &copymap, &PyTuple_Type, &pl,
+	                      &now_s, &now_ns)) {
 		return NULL;
 	}
 
@@ -928,7 +952,7 @@
 		mode = dirstate_item_c_v1_mode(tuple);
 		size = dirstate_item_c_v1_size(tuple);
 		mtime = dirstate_item_c_v1_mtime(tuple);
-		if (state == 'n' && mtime == now) {
+		if (state == 'n' && tuple->mtime_s == now_s) {
 			/* See pure/parsers.py:pack_dirstate for why we do
 			 * this. */
 			mtime = -1;
--- a/mercurial/cext/util.h	Tue Oct 19 21:03:13 2021 +0200
+++ b/mercurial/cext/util.h	Mon Oct 18 11:23:07 2021 +0200
@@ -27,7 +27,8 @@
 	int flags;
 	int mode;
 	int size;
-	int mtime;
+	int mtime_s;
+	int mtime_ns;
 } dirstateItemObject;
 /* clang-format on */
 
--- a/mercurial/dirstate.py	Tue Oct 19 21:03:13 2021 +0200
+++ b/mercurial/dirstate.py	Mon Oct 18 11:23:07 2021 +0200
@@ -31,6 +31,10 @@
     util,
 )
 
+from .dirstateutils import (
+    timestamp,
+)
+
 from .interfaces import (
     dirstate as intdirstate,
     util as interfaceutil,
@@ -66,7 +70,7 @@
     '''Get "now" timestamp on filesystem'''
     tmpfd, tmpname = vfs.mkstemp()
     try:
-        return os.fstat(tmpfd)[stat.ST_MTIME]
+        return timestamp.mtime_of(os.fstat(tmpfd))
     finally:
         os.close(tmpfd)
         vfs.unlink(tmpname)
@@ -122,7 +126,7 @@
         # UNC path pointing to root share (issue4557)
         self._rootdir = pathutil.normasprefix(root)
         self._dirty = False
-        self._lastnormaltime = 0
+        self._lastnormaltime = timestamp.zero()
         self._ui = ui
         self._filecache = {}
         self._parentwriters = 0
@@ -440,7 +444,7 @@
         for a in ("_map", "_branch", "_ignore"):
             if a in self.__dict__:
                 delattr(self, a)
-        self._lastnormaltime = 0
+        self._lastnormaltime = timestamp.zero()
         self._dirty = False
         self._parentwriters = 0
         self._origpl = None
@@ -639,7 +643,7 @@
         s = os.lstat(self._join(filename))
         mode = s.st_mode
         size = s.st_size
-        mtime = s[stat.ST_MTIME]
+        mtime = timestamp.mtime_of(s)
         return (mode, size, mtime)
 
     def _discoverpath(self, path, normed, ignoremissing, exists, storemap):
@@ -720,7 +724,7 @@
 
     def clear(self):
         self._map.clear()
-        self._lastnormaltime = 0
+        self._lastnormaltime = timestamp.zero()
         self._dirty = True
 
     def rebuild(self, parent, allfiles, changedfiles=None):
@@ -823,7 +827,7 @@
         if now is None:
             # use the modification time of the newly created temporary file as the
             # filesystem's notion of 'now'
-            now = util.fstat(st)[stat.ST_MTIME] & _rangemask
+            now = timestamp.mtime_of(util.fstat(st))
 
         # enough 'delaywrite' prevents 'pack_dirstate' from dropping
         # timestamp of each entries in dirstate, because of 'now > mtime'
@@ -840,11 +844,12 @@
                     start = int(clock) - (int(clock) % delaywrite)
                     end = start + delaywrite
                     time.sleep(end - clock)
-                    now = end  # trust our estimate that the end is near now
+                    # trust our estimate that the end is near now
+                    now = timestamp.timestamp((end, 0))
                     break
 
         self._map.write(tr, st, now)
-        self._lastnormaltime = 0
+        self._lastnormaltime = timestamp.zero()
         self._dirty = False
 
     def _dirignore(self, f):
@@ -1377,17 +1382,9 @@
                     uadd(fn)
                 continue
 
-            # This is equivalent to 'state, mode, size, time = dmap[fn]' but not
-            # written like that for performance reasons. dmap[fn] is not a
-            # Python tuple in compiled builds. The CPython UNPACK_SEQUENCE
-            # opcode has fast paths when the value to be unpacked is a tuple or
-            # a list, but falls back to creating a full-fledged iterator in
-            # general. That is much slower than simply accessing and storing the
-            # tuple members one by one.
             t = dget(fn)
             mode = t.mode
             size = t.size
-            time = t.mtime
 
             if not st and t.tracked:
                 dadd(fn)
@@ -1412,12 +1409,9 @@
                         ladd(fn)
                     else:
                         madd(fn)
-                elif (
-                    time != st[stat.ST_MTIME]
-                    and time != st[stat.ST_MTIME] & _rangemask
-                ):
+                elif not t.mtime_likely_equal_to(timestamp.mtime_of(st)):
                     ladd(fn)
-                elif st[stat.ST_MTIME] == lastnormaltime:
+                elif timestamp.mtime_of(st) == lastnormaltime:
                     # fn may have just been marked as normal and it may have
                     # changed in the same second without changing its size.
                     # This can happen if we quickly do multiple commits.
--- a/mercurial/dirstatemap.py	Tue Oct 19 21:03:13 2021 +0200
+++ b/mercurial/dirstatemap.py	Mon Oct 18 11:23:07 2021 +0200
@@ -127,7 +127,6 @@
     def set_clean(self, filename, mode, size, mtime):
         """mark a file as back to a clean state"""
         entry = self[filename]
-        mtime = mtime & rangemask
         size = size & rangemask
         entry.set_clean(mode, size, mtime)
         self._refresh_entry(filename, entry)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/dirstateutils/timestamp.py	Mon Oct 18 11:23:07 2021 +0200
@@ -0,0 +1,53 @@
+# Copyright Mercurial Contributors
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import stat
+
+
+rangemask = 0x7FFFFFFF
+
+
+class timestamp(tuple):
+    """
+    A Unix timestamp with nanoseconds precision,
+    modulo 2**31 seconds.
+
+    A 2-tuple containing:
+
+    `truncated_seconds`: seconds since the Unix epoch,
+    truncated to its lower 31 bits
+
+    `subsecond_nanoseconds`: number of nanoseconds since `truncated_seconds`.
+    """
+
+    def __new__(cls, value):
+        truncated_seconds, subsec_nanos = value
+        value = (truncated_seconds & rangemask, subsec_nanos)
+        return super(timestamp, cls).__new__(cls, value)
+
+
+def zero():
+    """
+    Returns the `timestamp` at the Unix epoch.
+    """
+    return tuple.__new__(timestamp, (0, 0))
+
+
+def mtime_of(stat_result):
+    """
+    Takes an `os.stat_result`-like object and returns a `timestamp` object
+    for its modification time.
+    """
+    # https://docs.python.org/2/library/os.html#os.stat_float_times
+    # "For compatibility with older Python versions,
+    #  accessing stat_result as a tuple always returns integers."
+    secs = stat_result[stat.ST_MTIME]
+
+    # For now
+    subsec_nanos = 0
+
+    return timestamp((secs, subsec_nanos))
--- a/mercurial/dirstateutils/v2.py	Tue Oct 19 21:03:13 2021 +0200
+++ b/mercurial/dirstateutils/v2.py	Mon Oct 18 11:23:07 2021 +0200
@@ -107,7 +107,10 @@
         # Parse child nodes of this node recursively
         parse_nodes(map, copy_map, data, children_start, children_count)
 
-        item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s)
+        # Don’t yet use sub-second precision if it exists in the file,
+        # since other parts of the code still set it to zero.
+        mtime_ns = 0
+        item = parsers.DirstateItem.from_v2_data(flags, size, mtime_s, mtime_ns)
         if not item.any_tracked:
             continue
         path = slice_with_len(data, path_start, path_len)
@@ -147,8 +150,7 @@
             copy_source_start = 0
             copy_source_len = 0
         if entry is not None:
-            flags, size, mtime_s = entry.v2_data()
-            mtime_ns = 0
+            flags, size, mtime_s, mtime_ns = entry.v2_data()
         else:
             # There are no mtime-cached directories in the Python implementation
             flags = 0
@@ -249,7 +251,6 @@
     written to the docket. Again, see more details on the on-disk format in
     `mercurial/helptext/internals/dirstate-v2`.
     """
-    now = int(now)
     data = bytearray()
     root_nodes_start = 0
     root_nodes_len = 0
--- a/mercurial/merge.py	Tue Oct 19 21:03:13 2021 +0200
+++ b/mercurial/merge.py	Mon Oct 18 11:23:07 2021 +0200
@@ -9,13 +9,13 @@
 
 import collections
 import errno
-import stat
 import struct
 
 from .i18n import _
 from .node import nullrev
 from .thirdparty import attr
 from .utils import stringutil
+from .dirstateutils import timestamp
 from . import (
     copies,
     encoding,
@@ -1406,8 +1406,9 @@
             if wantfiledata:
                 s = wfctx.lstat()
                 mode = s.st_mode
-                mtime = s[stat.ST_MTIME]
-                filedata[f] = (mode, size, mtime)  # for dirstate.normal
+                mtime = timestamp.mtime_of(s)
+                # for dirstate.update_file's parentfiledata argument:
+                filedata[f] = (mode, size, mtime)
             if i == 100:
                 yield False, (i, f)
                 i = 0
--- a/mercurial/pure/parsers.py	Tue Oct 19 21:03:13 2021 +0200
+++ b/mercurial/pure/parsers.py	Mon Oct 18 11:23:07 2021 +0200
@@ -99,7 +99,8 @@
     _p2_info = attr.ib()
     _mode = attr.ib()
     _size = attr.ib()
-    _mtime = attr.ib()
+    _mtime_s = attr.ib()
+    _mtime_ns = attr.ib()
     _fallback_exec = attr.ib()
     _fallback_symlink = attr.ib()
 
@@ -123,7 +124,8 @@
 
         self._mode = None
         self._size = None
-        self._mtime = None
+        self._mtime_s = None
+        self._mtime_ns = None
         if parentfiledata is None:
             has_meaningful_mtime = False
             has_meaningful_data = False
@@ -131,10 +133,10 @@
             self._mode = parentfiledata[0]
             self._size = parentfiledata[1]
         if has_meaningful_mtime:
-            self._mtime = parentfiledata[2]
+            self._mtime_s, self._mtime_ns = parentfiledata[2]
 
     @classmethod
-    def from_v2_data(cls, flags, size, mtime):
+    def from_v2_data(cls, flags, size, mtime_s, mtime_ns):
         """Build a new DirstateItem object from V2 data"""
         has_mode_size = bool(flags & DIRSTATE_V2_HAS_MODE_AND_SIZE)
         has_meaningful_mtime = bool(flags & DIRSTATE_V2_HAS_FILE_MTIME)
@@ -170,7 +172,7 @@
             p2_info=bool(flags & DIRSTATE_V2_P2_INFO),
             has_meaningful_data=has_mode_size,
             has_meaningful_mtime=has_meaningful_mtime,
-            parentfiledata=(mode, size, mtime),
+            parentfiledata=(mode, size, (mtime_s, mtime_ns)),
             fallback_exec=fallback_exec,
             fallback_symlink=fallback_symlink,
         )
@@ -207,13 +209,13 @@
                     wc_tracked=True,
                     p1_tracked=True,
                     has_meaningful_mtime=False,
-                    parentfiledata=(mode, size, 42),
+                    parentfiledata=(mode, size, (42, 0)),
                 )
             else:
                 return cls(
                     wc_tracked=True,
                     p1_tracked=True,
-                    parentfiledata=(mode, size, mtime),
+                    parentfiledata=(mode, size, (mtime, 0)),
                 )
         else:
             raise RuntimeError(b'unknown state: %s' % state)
@@ -224,7 +226,8 @@
         This means the next status call will have to actually check its content
         to make sure it is correct.
         """
-        self._mtime = None
+        self._mtime_s = None
+        self._mtime_ns = None
 
     def set_clean(self, mode, size, mtime):
         """mark a file as "clean" cancelling potential "possibly dirty call"
@@ -238,7 +241,7 @@
         self._p1_tracked = True
         self._mode = mode
         self._size = size
-        self._mtime = mtime
+        self._mtime_s, self._mtime_ns = mtime
 
     def set_tracked(self):
         """mark a file as tracked in the working copy
@@ -250,7 +253,8 @@
         # the files as needing lookup
         #
         # Consider dropping this in the future in favor of something less broad.
-        self._mtime = None
+        self._mtime_s = None
+        self._mtime_ns = None
 
     def set_untracked(self):
         """mark a file as untracked in the working copy
@@ -260,7 +264,8 @@
         self._wc_tracked = False
         self._mode = None
         self._size = None
-        self._mtime = None
+        self._mtime_s = None
+        self._mtime_ns = None
 
     def drop_merge_data(self):
         """remove all "merge-only" from a DirstateItem
@@ -271,7 +276,8 @@
             self._p2_info = False
             self._mode = None
             self._size = None
-            self._mtime = None
+            self._mtime_s = None
+            self._mtime_ns = None
 
     @property
     def mode(self):
@@ -285,6 +291,14 @@
     def mtime(self):
         return self.v1_mtime()
 
+    def mtime_likely_equal_to(self, other_mtime):
+        self_sec = self._mtime_s
+        if self_sec is None:
+            return False
+        self_ns = self._mtime_ns
+        other_sec, other_ns = other_mtime
+        return self_sec == other_sec and self_ns == other_ns
+
     @property
     def state(self):
         """
@@ -440,7 +454,7 @@
                 flags |= DIRSTATE_V2_MODE_EXEC_PERM
             if stat.S_ISLNK(self.mode):
                 flags |= DIRSTATE_V2_MODE_IS_SYMLINK
-        if self._mtime is not None:
+        if self._mtime_s is not None:
             flags |= DIRSTATE_V2_HAS_FILE_MTIME
 
         if self._fallback_exec is not None:
@@ -456,7 +470,7 @@
         # Note: we do not need to do anything regarding
         # DIRSTATE_V2_ALL_UNKNOWN_RECORDED and DIRSTATE_V2_ALL_IGNORED_RECORDED
         # since we never set _DIRSTATE_V2_HAS_DIRCTORY_MTIME
-        return (flags, self._size or 0, self._mtime or 0)
+        return (flags, self._size or 0, self._mtime_s or 0, self._mtime_ns or 0)
 
     def v1_state(self):
         """return a "state" suitable for v1 serialization"""
@@ -504,18 +518,18 @@
             raise RuntimeError('untracked item')
         elif self.removed:
             return 0
-        elif self._mtime is None:
+        elif self._mtime_s is None:
             return AMBIGUOUS_TIME
         elif self._p2_info:
             return AMBIGUOUS_TIME
         elif not self._p1_tracked:
             return AMBIGUOUS_TIME
         else:
-            return self._mtime
+            return self._mtime_s
 
     def need_delay(self, now):
         """True if the stored mtime would be ambiguous with the current time"""
-        return self.v1_state() == b'n' and self.v1_mtime() == now
+        return self.v1_state() == b'n' and self._mtime_s == now[0]
 
 
 def gettype(q):
@@ -883,7 +897,6 @@
 
 
 def pack_dirstate(dmap, copymap, pl, now):
-    now = int(now)
     cs = stringio()
     write = cs.write
     write(b"".join(pl))
--- a/rust/hg-core/src/dirstate/entry.rs	Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-core/src/dirstate/entry.rs	Mon Oct 18 11:23:07 2021 +0200
@@ -14,14 +14,15 @@
     Merged,
 }
 
-/// The C implementation uses all signed types. This will be an issue
-/// either when 4GB+ source files are commonplace or in 2038, whichever
-/// comes first.
-#[derive(Debug, PartialEq, Copy, Clone)]
+/// `size` and `mtime.seconds` are truncated to 31 bits.
+///
+/// TODO: double-check status algorithm correctness for files
+/// larger than 2 GiB or modified after 2038.
+#[derive(Debug, Copy, Clone)]
 pub struct DirstateEntry {
     pub(crate) flags: Flags,
     mode_size: Option<(u32, u32)>,
-    mtime: Option<u32>,
+    mtime: Option<TruncatedTimestamp>,
 }
 
 bitflags! {
@@ -37,7 +38,7 @@
 }
 
 /// A Unix timestamp with nanoseconds precision
-#[derive(Copy, Clone)]
+#[derive(Debug, Copy, Clone)]
 pub struct TruncatedTimestamp {
     truncated_seconds: u32,
     /// Always in the `0 .. 1_000_000_000` range.
@@ -90,6 +91,11 @@
         }
     }
 
+    pub fn to_integer_second(mut self) -> Self {
+        self.nanoseconds = 0;
+        self
+    }
+
     /// The lower 31 bits of the number of seconds since the epoch.
     pub fn truncated_seconds(&self) -> u32 {
         self.truncated_seconds
@@ -182,7 +188,7 @@
         p1_tracked: bool,
         p2_info: bool,
         mode_size: Option<(u32, u32)>,
-        mtime: Option<u32>,
+        mtime: Option<TruncatedTimestamp>,
         fallback_exec: Option<bool>,
         fallback_symlink: Option<bool>,
     ) -> Self {
@@ -191,9 +197,6 @@
             assert!(mode & !RANGE_MASK_31BIT == 0);
             assert!(size & !RANGE_MASK_31BIT == 0);
         }
-        if let Some(mtime) = mtime {
-            assert!(mtime & !RANGE_MASK_31BIT == 0);
-        }
         let mut flags = Flags::empty();
         flags.set(Flags::WDIR_TRACKED, wdir_tracked);
         flags.set(Flags::P1_TRACKED, p1_tracked);
@@ -252,6 +255,9 @@
                     let mode = u32::try_from(mode).unwrap();
                     let size = u32::try_from(size).unwrap();
                     let mtime = u32::try_from(mtime).unwrap();
+                    let mtime =
+                        TruncatedTimestamp::from_already_truncated(mtime, 0)
+                            .unwrap();
                     Self {
                         flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED,
                         mode_size: Some((mode, size)),
@@ -344,7 +350,7 @@
         bool,
         bool,
         Option<(u32, u32)>,
-        Option<u32>,
+        Option<TruncatedTimestamp>,
         Option<bool>,
         Option<bool>,
     ) {
@@ -429,7 +435,7 @@
         } else if !self.flags.contains(Flags::P1_TRACKED) {
             MTIME_UNSET
         } else if let Some(mtime) = self.mtime {
-            i32::try_from(mtime).unwrap()
+            i32::try_from(mtime.truncated_seconds()).unwrap()
         } else {
             MTIME_UNSET
         }
@@ -501,6 +507,10 @@
         }
     }
 
+    pub fn truncated_mtime(&self) -> Option<TruncatedTimestamp> {
+        self.mtime
+    }
+
     pub fn drop_merge_data(&mut self) {
         if self.flags.contains(Flags::P2_INFO) {
             self.flags.remove(Flags::P2_INFO);
@@ -513,9 +523,13 @@
         self.mtime = None
     }
 
-    pub fn set_clean(&mut self, mode: u32, size: u32, mtime: u32) {
+    pub fn set_clean(
+        &mut self,
+        mode: u32,
+        size: u32,
+        mtime: TruncatedTimestamp,
+    ) {
         let size = size & RANGE_MASK_31BIT;
-        let mtime = mtime & RANGE_MASK_31BIT;
         self.flags.insert(Flags::WDIR_TRACKED | Flags::P1_TRACKED);
         self.mode_size = Some((mode, size));
         self.mtime = Some(mtime);
@@ -577,8 +591,13 @@
     }
 
     /// True if the stored mtime would be ambiguous with the current time
-    pub fn need_delay(&self, now: i32) -> bool {
-        self.state() == EntryState::Normal && self.mtime() == now
+    pub fn need_delay(&self, now: TruncatedTimestamp) -> bool {
+        if let Some(mtime) = self.mtime {
+            self.state() == EntryState::Normal
+                && mtime.truncated_seconds() == now.truncated_seconds()
+        } else {
+            false
+        }
     }
 }
 
--- a/rust/hg-core/src/dirstate/parsers.rs	Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-core/src/dirstate/parsers.rs	Mon Oct 18 11:23:07 2021 +0200
@@ -135,6 +135,3 @@
         packed.extend(source.as_bytes());
     }
 }
-
-/// Seconds since the Unix epoch
-pub struct Timestamp(pub i64);
--- a/rust/hg-core/src/dirstate/status.rs	Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-core/src/dirstate/status.rs	Mon Oct 18 11:23:07 2021 +0200
@@ -12,6 +12,7 @@
 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
 
 use crate::{
+    dirstate::TruncatedTimestamp,
     utils::hg_path::{HgPath, HgPathError},
     PatternError,
 };
@@ -64,7 +65,7 @@
     /// Remember the most recent modification timeslot for status, to make
     /// sure we won't miss future size-preserving file content modifications
     /// that happen within the same timeslot.
-    pub last_normal_time: i64,
+    pub last_normal_time: TruncatedTimestamp,
     /// Whether we are on a filesystem with UNIX-like exec flags
     pub check_exec: bool,
     pub list_clean: bool,
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Mon Oct 18 11:23:07 2021 +0200
@@ -1,7 +1,6 @@
 use bytes_cast::BytesCast;
 use micro_timer::timed;
 use std::borrow::Cow;
-use std::convert::TryInto;
 use std::path::PathBuf;
 
 use super::on_disk;
@@ -11,7 +10,6 @@
 use crate::dirstate::parsers::pack_entry;
 use crate::dirstate::parsers::packed_entry_size;
 use crate::dirstate::parsers::parse_dirstate_entries;
-use crate::dirstate::parsers::Timestamp;
 use crate::dirstate::CopyMapIter;
 use crate::dirstate::StateMapIter;
 use crate::dirstate::TruncatedTimestamp;
@@ -932,10 +930,9 @@
     pub fn pack_v1(
         &mut self,
         parents: DirstateParents,
-        now: Timestamp,
+        now: TruncatedTimestamp,
     ) -> Result<Vec<u8>, DirstateError> {
         let map = self.get_map_mut();
-        let now: i32 = now.0.try_into().expect("time overflow");
         let mut ambiguous_mtimes = Vec::new();
         // Optizimation (to be measured?): pre-compute size to avoid `Vec`
         // reallocations
@@ -981,12 +978,10 @@
     #[timed]
     pub fn pack_v2(
         &mut self,
-        now: Timestamp,
+        now: TruncatedTimestamp,
         can_append: bool,
     ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
         let map = self.get_map_mut();
-        // TODO: how do we want to handle this in 2038?
-        let now: i32 = now.0.try_into().expect("time overflow");
         let mut paths = Vec::new();
         for node in map.iter_nodes() {
             let node = node?;
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Mon Oct 18 11:23:07 2021 +0200
@@ -317,7 +317,7 @@
         &self,
     ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
         if self.has_entry() {
-            Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
+            Ok(dirstate_map::NodeData::Entry(self.assume_entry()?))
         } else if let Some(mtime) = self.cached_directory_mtime()? {
             Ok(dirstate_map::NodeData::CachedDirectory { mtime })
         } else {
@@ -357,7 +357,7 @@
         file_type | permisions
     }
 
-    fn assume_entry(&self) -> DirstateEntry {
+    fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
         // TODO: convert through raw bits instead?
         let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
         let p1_tracked = self.flags().contains(Flags::P1_TRACKED);
@@ -372,11 +372,19 @@
         let mtime = if self.flags().contains(Flags::HAS_FILE_MTIME)
             && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
         {
-            Some(self.mtime.truncated_seconds.into())
+            // TODO: replace this by `self.mtime.try_into()?` to use
+            // sub-second precision from the file.
+            // We don’t do this yet because other parts of the code
+            // always set it to zero.
+            let mtime = TruncatedTimestamp::from_already_truncated(
+                self.mtime.truncated_seconds.get(),
+                0,
+            )?;
+            Some(mtime)
         } else {
             None
         };
-        DirstateEntry::from_v2_data(
+        Ok(DirstateEntry::from_v2_data(
             wdir_tracked,
             p1_tracked,
             p2_info,
@@ -384,14 +392,14 @@
             mtime,
             None,
             None,
-        )
+        ))
     }
 
     pub(super) fn entry(
         &self,
     ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
         if self.has_entry() {
-            Ok(Some(self.assume_entry()))
+            Ok(Some(self.assume_entry()?))
         } else {
             Ok(None)
         }
@@ -450,10 +458,7 @@
         };
         let mtime = if let Some(m) = mtime_opt {
             flags.insert(Flags::HAS_FILE_MTIME);
-            PackedTruncatedTimestamp {
-                truncated_seconds: m.into(),
-                nanoseconds: 0.into(),
-            }
+            m.into()
         } else {
             PackedTruncatedTimestamp::null()
         };
--- a/rust/hg-core/src/dirstate_tree/status.rs	Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/status.rs	Mon Oct 18 11:23:07 2021 +0200
@@ -501,9 +501,6 @@
         fn truncate_u64(value: u64) -> i32 {
             (value & 0x7FFF_FFFF) as i32
         }
-        fn truncate_i64(value: i64) -> i32 {
-            (value & 0x7FFF_FFFF) as i32
-        }
 
         let entry = dirstate_node
             .entry()?
@@ -531,10 +528,19 @@
                 .modified
                 .push(hg_path.detach_from_tree())
         } else {
-            let mtime = mtime_seconds(fs_metadata);
-            if truncate_i64(mtime) != entry.mtime()
-                || mtime == self.options.last_normal_time
-            {
+            let mtime_looks_clean;
+            if let Some(dirstate_mtime) = entry.truncated_mtime() {
+                let fs_mtime = TruncatedTimestamp::for_mtime_of(fs_metadata)
+                    .expect("OS/libc does not support mtime?")
+                    // For now don’t use sub-second precision for file mtimes
+                    .to_integer_second();
+                mtime_looks_clean = fs_mtime.likely_equal(dirstate_mtime)
+                    && !fs_mtime.likely_equal(self.options.last_normal_time)
+            } else {
+                // No mtime in the dirstate entry
+                mtime_looks_clean = false
+            };
+            if !mtime_looks_clean {
                 self.outcome
                     .lock()
                     .unwrap()
@@ -690,15 +696,6 @@
     }
 }
 
-#[cfg(unix)] // TODO
-fn mtime_seconds(metadata: &std::fs::Metadata) -> i64 {
-    // Going through `Metadata::modified()` would be portable, but would take
-    // care to construct a `SystemTime` value with sub-second precision just
-    // for us to throw that away here.
-    use std::os::unix::fs::MetadataExt;
-    metadata.mtime()
-}
-
 struct DirEntry {
     base_name: HgPathBuf,
     full_path: PathBuf,
--- a/rust/hg-cpython/src/dirstate.rs	Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-cpython/src/dirstate.rs	Mon Oct 18 11:23:07 2021 +0200
@@ -54,7 +54,7 @@
                 matcher: PyObject,
                 ignorefiles: PyList,
                 check_exec: bool,
-                last_normal_time: i64,
+                last_normal_time: (u32, u32),
                 list_clean: bool,
                 list_ignored: bool,
                 list_unknown: bool,
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs	Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs	Mon Oct 18 11:23:07 2021 +0200
@@ -18,11 +18,10 @@
 
 use crate::{
     dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
-    dirstate::item::DirstateItem,
+    dirstate::item::{timestamp, DirstateItem},
     pybytes_deref::PyBytesDeref,
 };
 use hg::{
-    dirstate::parsers::Timestamp,
     dirstate::StateMapIter,
     dirstate_tree::dirstate_map::DirstateMap as TreeDirstateMap,
     dirstate_tree::on_disk::DirstateV2ParseError,
@@ -195,9 +194,9 @@
         &self,
         p1: PyObject,
         p2: PyObject,
-        now: PyObject
+        now: (u32, u32)
     ) -> PyResult<PyBytes> {
-        let now = Timestamp(now.extract(py)?);
+        let now = timestamp(py, now)?;
 
         let mut inner = self.inner(py).borrow_mut();
         let parents = DirstateParents {
@@ -219,10 +218,10 @@
     /// instead of written to a new data file (False).
     def write_v2(
         &self,
-        now: PyObject,
+        now: (u32, u32),
         can_append: bool,
     ) -> PyResult<PyObject> {
-        let now = Timestamp(now.extract(py)?);
+        let now = timestamp(py, now)?;
 
         let mut inner = self.inner(py).borrow_mut();
         let result = inner.pack_v2(now, can_append);
--- a/rust/hg-cpython/src/dirstate/item.rs	Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/item.rs	Mon Oct 18 11:23:07 2021 +0200
@@ -9,6 +9,7 @@
 use cpython::PythonObject;
 use hg::dirstate::DirstateEntry;
 use hg::dirstate::EntryState;
+use hg::dirstate::TruncatedTimestamp;
 use std::cell::Cell;
 use std::convert::TryFrom;
 
@@ -22,7 +23,7 @@
         p2_info: bool = false,
         has_meaningful_data: bool = true,
         has_meaningful_mtime: bool = true,
-        parentfiledata: Option<(u32, u32, u32)> = None,
+        parentfiledata: Option<(u32, u32, (u32, u32))> = None,
         fallback_exec: Option<bool> = None,
         fallback_symlink: Option<bool> = None,
 
@@ -34,7 +35,7 @@
                 mode_size_opt = Some((mode, size))
             }
             if has_meaningful_mtime {
-                mtime_opt = Some(mtime)
+                mtime_opt = Some(timestamp(py, mtime)?)
             }
         }
         let entry = DirstateEntry::from_v2_data(
@@ -191,10 +192,19 @@
         Ok(mtime)
     }
 
-    def need_delay(&self, now: i32) -> PyResult<bool> {
+    def need_delay(&self, now: (u32, u32)) -> PyResult<bool> {
+        let now = timestamp(py, now)?;
         Ok(self.entry(py).get().need_delay(now))
     }
 
+    def mtime_likely_equal_to(&self, other: (u32, u32)) -> PyResult<bool> {
+        if let Some(mtime) = self.entry(py).get().truncated_mtime() {
+            Ok(mtime.likely_equal(timestamp(py, other)?))
+        } else {
+            Ok(false)
+        }
+    }
+
     @classmethod
     def from_v1_data(
         _cls,
@@ -220,8 +230,9 @@
         &self,
         mode: u32,
         size: u32,
-        mtime: u32,
+        mtime: (u32, u32),
     ) -> PyResult<PyNone> {
+        let mtime = timestamp(py, mtime)?;
         self.update(py, |entry| entry.set_clean(mode, size, mtime));
         Ok(PyNone)
     }
@@ -261,3 +272,15 @@
         self.entry(py).set(entry)
     }
 }
+
+pub(crate) fn timestamp(
+    py: Python<'_>,
+    (s, ns): (u32, u32),
+) -> PyResult<TruncatedTimestamp> {
+    TruncatedTimestamp::from_already_truncated(s, ns).map_err(|_| {
+        PyErr::new::<exc::ValueError, _>(
+            py,
+            "expected mtime truncated to 31 bits",
+        )
+    })
+}
--- a/rust/hg-cpython/src/dirstate/status.rs	Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/status.rs	Mon Oct 18 11:23:07 2021 +0200
@@ -9,6 +9,7 @@
 //! `hg-core` crate. From Python, this will be seen as
 //! `rustext.dirstate.status`.
 
+use crate::dirstate::item::timestamp;
 use crate::{dirstate::DirstateMap, exceptions::FallbackError};
 use cpython::exc::OSError;
 use cpython::{
@@ -102,12 +103,13 @@
     root_dir: PyObject,
     ignore_files: PyList,
     check_exec: bool,
-    last_normal_time: i64,
+    last_normal_time: (u32, u32),
     list_clean: bool,
     list_ignored: bool,
     list_unknown: bool,
     collect_traversed_dirs: bool,
 ) -> PyResult<PyTuple> {
+    let last_normal_time = timestamp(py, last_normal_time)?;
     let bytes = root_dir.extract::<PyBytes>(py)?;
     let root_dir = get_path_from_bytes(bytes.data(py));
 
--- a/rust/rhg/src/commands/status.rs	Tue Oct 19 21:03:13 2021 +0200
+++ b/rust/rhg/src/commands/status.rs	Mon Oct 18 11:23:07 2021 +0200
@@ -11,6 +11,7 @@
 use clap::{Arg, SubCommand};
 use hg;
 use hg::config::Config;
+use hg::dirstate::TruncatedTimestamp;
 use hg::errors::HgError;
 use hg::manifest::Manifest;
 use hg::matchers::AlwaysMatcher;
@@ -180,7 +181,7 @@
         // hence be stored on dmap. Using a value that assumes we aren't
         // below the time resolution granularity of the FS and the
         // dirstate.
-        last_normal_time: 0,
+        last_normal_time: TruncatedTimestamp::new_truncate(0, 0),
         // we're currently supporting file systems with exec flags only
         // anyway
         check_exec: true,
--- a/tests/fakedirstatewritetime.py	Tue Oct 19 21:03:13 2021 +0200
+++ b/tests/fakedirstatewritetime.py	Mon Oct 18 11:23:07 2021 +0200
@@ -15,6 +15,7 @@
     policy,
     registrar,
 )
+from mercurial.dirstateutils import timestamp
 from mercurial.utils import dateutil
 
 try:
@@ -40,9 +41,8 @@
 def pack_dirstate(fakenow, orig, dmap, copymap, pl, now):
     # execute what original parsers.pack_dirstate should do actually
     # for consistency
-    actualnow = int(now)
     for f, e in dmap.items():
-        if e.need_delay(actualnow):
+        if e.need_delay(now):
             e.set_possibly_dirty()
 
     return orig(dmap, copymap, pl, fakenow)
@@ -62,6 +62,7 @@
     # parsing 'fakenow' in YYYYmmddHHMM format makes comparison between
     # 'fakenow' value and 'touch -t YYYYmmddHHMM' argument easy
     fakenow = dateutil.parsedate(fakenow, [b'%Y%m%d%H%M'])[0]
+    fakenow = timestamp.timestamp((fakenow, 0))
 
     if has_rust_dirstate:
         # The Rust implementation does not use public parse/pack dirstate