dirstate-map: move most of `dirstate.update_file` logic in the dsmap
authorPierre-Yves David <pierre-yves.david@octobus.net>
Mon, 19 Jul 2021 07:23:55 +0200
changeset 47692 e5fb14a07866
parent 47691 33beeb32f73a
child 47693 46c318b9b9a4
dirstate-map: move most of `dirstate.update_file` logic in the dsmap A new `reset_state` method is introduced to deal with most of that logic. This move things one layer lower, but the ultimate goal is to deal with most of this at the DirstateItem level. This reveal various imperfection with the data passed to update_file by `mergestate.recordupdates`, however this is orthogonal to this patch and should be dealt with at a higher level. Differential Revision: https://phab.mercurial-scm.org/D11134
mercurial/dirstate.py
mercurial/dirstatemap.py
rust/hg-core/src/dirstate/dirstate_map.rs
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/dispatch.rs
rust/hg-cpython/src/dirstate/dirstate_map.rs
rust/hg-cpython/src/dirstate/dispatch.rs
rust/hg-cpython/src/dirstate/non_normal_entries.rs
--- a/mercurial/dirstate.py	Fri Jul 16 22:30:11 2021 +0200
+++ b/mercurial/dirstate.py	Mon Jul 19 07:23:55 2021 +0200
@@ -564,30 +564,46 @@
         if merged and (clean_p1 or clean_p2):
             msg = b'`merged` argument incompatible with `clean_p1`/`clean_p2`'
             raise error.ProgrammingError(msg)
-        if not (p1_tracked or p2_tracked or wc_tracked):
-            self._drop(filename)
-        elif merged:
-            assert wc_tracked
-            assert self.in_merge  # we are never in the "normallookup" case
-            self.otherparent(filename)
-        elif not (p1_tracked or p2_tracked) and wc_tracked:
-            self._addpath(filename, added=True, possibly_dirty=possibly_dirty)
-            self._map.copymap.pop(filename, None)
-        elif (p1_tracked or p2_tracked) and not wc_tracked:
-            self._remove(filename)
-        elif clean_p2 and wc_tracked:
-            assert p2_tracked
-            self.otherparent(filename)
-        elif not p1_tracked and p2_tracked and wc_tracked:
-            self._addpath(filename, from_p2=True, possibly_dirty=possibly_dirty)
-            self._map.copymap.pop(filename, None)
-        elif possibly_dirty:
-            self._addpath(filename, possibly_dirty=possibly_dirty)
-        elif wc_tracked:
-            self.normal(filename, parentfiledata=parentfiledata)
-        # XXX We need something for file that are dirty after an update
-        else:
-            assert False, 'unreachable'
+
+        # note: I do not think we need to double check name clash here since we
+        # are in a update/merge case that should already have taken care of
+        # this. The test agrees
+
+        self._dirty = True
+        self._updatedfiles.add(filename)
+
+        need_parent_file_data = (
+            not (possibly_dirty or clean_p2 or merged)
+            and wc_tracked
+            and p1_tracked
+        )
+
+        # this mean we are doing call for file we do not really care about the
+        # data (eg: added or removed), however this should be a minor overhead
+        # compared to the overall update process calling this.
+        if need_parent_file_data:
+            if parentfiledata is None:
+                parentfiledata = self._get_filedata(filename)
+            mtime = parentfiledata[2]
+
+            if mtime > self._lastnormaltime:
+                # Remember the most recent modification timeslot for
+                # status(), to make sure we won't miss future
+                # size-preserving file content modifications that happen
+                # within the same timeslot.
+                self._lastnormaltime = mtime
+
+        self._map.reset_state(
+            filename,
+            wc_tracked,
+            p1_tracked,
+            p2_tracked=p2_tracked,
+            merged=merged,
+            clean_p1=clean_p1,
+            clean_p2=clean_p2,
+            possibly_dirty=possibly_dirty,
+            parentfiledata=parentfiledata,
+        )
 
     def _addpath(
         self,
--- a/mercurial/dirstatemap.py	Fri Jul 16 22:30:11 2021 +0200
+++ b/mercurial/dirstatemap.py	Mon Jul 19 07:23:55 2021 +0200
@@ -219,6 +219,83 @@
         if e.dm_otherparent:
             self.otherparentset.add(f)
 
+    def reset_state(
+        self,
+        filename,
+        wc_tracked,
+        p1_tracked,
+        p2_tracked=False,
+        merged=False,
+        clean_p1=False,
+        clean_p2=False,
+        possibly_dirty=False,
+        parentfiledata=None,
+    ):
+        """Set a entry to a given state, diregarding all previous state
+
+        This is to be used by the part of the dirstate API dedicated to
+        adjusting the dirstate after a update/merge.
+
+        note: calling this might result to no entry existing at all if the
+        dirstate map does not see any point at having one for this file
+        anymore.
+        """
+        if merged and (clean_p1 or clean_p2):
+            msg = b'`merged` argument incompatible with `clean_p1`/`clean_p2`'
+            raise error.ProgrammingError(msg)
+        # copy information are now outdated
+        # (maybe new information should be in directly passed to this function)
+        self.copymap.pop(filename, None)
+
+        if not (p1_tracked or p2_tracked or wc_tracked):
+            self.dropfile(filename)
+        elif merged:
+            # XXX might be merged and removed ?
+            entry = self.get(filename)
+            if entry is not None and entry.tracked:
+                # XXX mostly replicate dirstate.other parent.  We should get
+                # the higher layer to pass us more reliable data where `merged`
+                # actually mean merged. Dropping the else clause will show
+                # failure in `test-graft.t`
+                self.addfile(filename, merged=True)
+            else:
+                self.addfile(filename, from_p2=True)
+        elif not (p1_tracked or p2_tracked) and wc_tracked:
+            self.addfile(filename, added=True, possibly_dirty=possibly_dirty)
+        elif (p1_tracked or p2_tracked) and not wc_tracked:
+            # XXX might be merged and removed ?
+            old_entry = self._map.get(filename)
+            self._dirs_decr(filename, old_entry=old_entry, remove_variant=True)
+            self._map[filename] = DirstateItem(b'r', 0, 0, 0)
+            self.nonnormalset.add(filename)
+        elif clean_p2 and wc_tracked:
+            if p1_tracked or self.get(filename) is not None:
+                # XXX the `self.get` call is catching some case in
+                # `test-merge-remove.t` where the file is tracked in p1, the
+                # p1_tracked argument is False.
+                #
+                # In addition, this seems to be a case where the file is marked
+                # as merged without actually being the result of a merge
+                # action. So thing are not ideal here.
+                self.addfile(filename, merged=True)
+            else:
+                self.addfile(filename, from_p2=True)
+        elif not p1_tracked and p2_tracked and wc_tracked:
+            self.addfile(filename, from_p2=True, possibly_dirty=possibly_dirty)
+        elif possibly_dirty:
+            self.addfile(filename, possibly_dirty=possibly_dirty)
+        elif wc_tracked:
+            # this is a "normal" file
+            if parentfiledata is None:
+                msg = b'failed to pass parentfiledata for a normal file: %s'
+                msg %= filename
+                raise error.ProgrammingError(msg)
+            mode, size, mtime = parentfiledata
+            self.addfile(filename, mode=mode, size=size, mtime=mtime)
+            self.nonnormalset.discard(filename)
+        else:
+            assert False, 'unreachable'
+
     def removefile(self, f, in_merge=False):
         """
         Mark a file as removed in the dirstate.
@@ -499,6 +576,87 @@
                 possibly_dirty,
             )
 
+        def reset_state(
+            self,
+            filename,
+            wc_tracked,
+            p1_tracked,
+            p2_tracked=False,
+            merged=False,
+            clean_p1=False,
+            clean_p2=False,
+            possibly_dirty=False,
+            parentfiledata=None,
+        ):
+            """Set a entry to a given state, disregarding all previous state
+
+            This is to be used by the part of the dirstate API dedicated to
+            adjusting the dirstate after a update/merge.
+
+            note: calling this might result to no entry existing at all if the
+            dirstate map does not see any point at having one for this file
+            anymore.
+            """
+            if merged and (clean_p1 or clean_p2):
+                msg = (
+                    b'`merged` argument incompatible with `clean_p1`/`clean_p2`'
+                )
+                raise error.ProgrammingError(msg)
+            # copy information are now outdated
+            # (maybe new information should be in directly passed to this function)
+            self.copymap.pop(filename, None)
+
+            if not (p1_tracked or p2_tracked or wc_tracked):
+                self.dropfile(filename)
+            elif merged:
+                # XXX might be merged and removed ?
+                entry = self.get(filename)
+                if entry is not None and entry.tracked:
+                    # XXX mostly replicate dirstate.other parent.  We should get
+                    # the higher layer to pass us more reliable data where `merged`
+                    # actually mean merged. Dropping the else clause will show
+                    # failure in `test-graft.t`
+                    self.addfile(filename, merged=True)
+                else:
+                    self.addfile(filename, from_p2=True)
+            elif not (p1_tracked or p2_tracked) and wc_tracked:
+                self.addfile(
+                    filename, added=True, possibly_dirty=possibly_dirty
+                )
+            elif (p1_tracked or p2_tracked) and not wc_tracked:
+                # XXX might be merged and removed ?
+                self[filename] = DirstateItem(b'r', 0, 0, 0)
+                self.nonnormalset.add(filename)
+            elif clean_p2 and wc_tracked:
+                if p1_tracked or self.get(filename) is not None:
+                    # XXX the `self.get` call is catching some case in
+                    # `test-merge-remove.t` where the file is tracked in p1, the
+                    # p1_tracked argument is False.
+                    #
+                    # In addition, this seems to be a case where the file is marked
+                    # as merged without actually being the result of a merge
+                    # action. So thing are not ideal here.
+                    self.addfile(filename, merged=True)
+                else:
+                    self.addfile(filename, from_p2=True)
+            elif not p1_tracked and p2_tracked and wc_tracked:
+                self.addfile(
+                    filename, from_p2=True, possibly_dirty=possibly_dirty
+                )
+            elif possibly_dirty:
+                self.addfile(filename, possibly_dirty=possibly_dirty)
+            elif wc_tracked:
+                # this is a "normal" file
+                if parentfiledata is None:
+                    msg = b'failed to pass parentfiledata for a normal file: %s'
+                    msg %= filename
+                    raise error.ProgrammingError(msg)
+                mode, size, mtime = parentfiledata
+                self.addfile(filename, mode=mode, size=size, mtime=mtime)
+                self.nonnormalset.discard(filename)
+            else:
+                assert False, 'unreachable'
+
         def removefile(self, *args, **kwargs):
             return self._rustmap.removefile(*args, **kwargs)
 
@@ -748,3 +906,7 @@
             for name in self._rustmap.tracked_dirs():
                 f[normcase(name)] = name
             return f
+
+        def __setitem__(self, key, value):
+            assert isinstance(value, DirstateItem)
+            self._rustmap.set_v1(key, value)
--- a/rust/hg-core/src/dirstate/dirstate_map.rs	Fri Jul 16 22:30:11 2021 +0200
+++ b/rust/hg-core/src/dirstate/dirstate_map.rs	Mon Jul 19 07:23:55 2021 +0200
@@ -64,6 +64,10 @@
         self.other_parent_set = None;
     }
 
+    pub fn set_v1_inner(&mut self, filename: &HgPath, entry: DirstateEntry) {
+        self.state_map.insert(filename.to_owned(), entry);
+    }
+
     /// Add a tracked file to the dirstate
     pub fn add_file(
         &mut self,
@@ -245,10 +249,19 @@
         }
     }
 
-    pub fn non_normal_entries_remove(&mut self, key: impl AsRef<HgPath>) {
+    pub fn non_normal_entries_remove(
+        &mut self,
+        key: impl AsRef<HgPath>,
+    ) -> bool {
         self.get_non_normal_other_parent_entries()
             .0
-            .remove(key.as_ref());
+            .remove(key.as_ref())
+    }
+
+    pub fn non_normal_entries_add(&mut self, key: impl AsRef<HgPath>) {
+        self.get_non_normal_other_parent_entries()
+            .0
+            .insert(key.as_ref().into());
     }
 
     pub fn non_normal_entries_union(
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Fri Jul 16 22:30:11 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Mon Jul 19 07:23:55 2021 +0200
@@ -757,6 +757,16 @@
         self.nodes_with_copy_source_count = 0;
     }
 
+    fn set_v1(&mut self, filename: &HgPath, entry: DirstateEntry) {
+        let node =
+            self.get_or_insert(&filename).expect("no parse error in v1");
+        node.data = NodeData::Entry(entry);
+        node.children = ChildNodes::default();
+        node.copy_source = None;
+        node.descendants_with_entry_count = 0;
+        node.tracked_descendants_count = 0;
+    }
+
     fn add_file(
         &mut self,
         filename: &HgPath,
@@ -982,7 +992,16 @@
         })
     }
 
-    fn non_normal_entries_remove(&mut self, _key: &HgPath) {
+    fn non_normal_entries_remove(&mut self, key: &HgPath) -> bool {
+        // Do nothing, this `DirstateMap` does not have a separate "non normal
+        // entries" set that need to be kept up to date.
+        if let Ok(Some(v)) = self.get(key) {
+            return v.is_non_normal();
+        }
+        false
+    }
+
+    fn non_normal_entries_add(&mut self, _key: &HgPath) {
         // Do nothing, this `DirstateMap` does not have a separate "non normal
         // entries" set that need to be kept up to date
     }
--- a/rust/hg-core/src/dirstate_tree/dispatch.rs	Fri Jul 16 22:30:11 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dispatch.rs	Mon Jul 19 07:23:55 2021 +0200
@@ -37,6 +37,8 @@
     /// Remove information about all files in this map
     fn clear(&mut self);
 
+    fn set_v1(&mut self, filename: &HgPath, entry: DirstateEntry);
+
     /// Add or change the information associated to a given file.
     ///
     /// `old_state` is the state in the entry that `get` would have returned
@@ -95,7 +97,13 @@
     /// Mark the given path as "normal" file. This is only relevant in the flat
     /// dirstate map where there is a separate `HashSet` that needs to be kept
     /// up to date.
-    fn non_normal_entries_remove(&mut self, key: &HgPath);
+    /// Returns whether the key was present in the set.
+    fn non_normal_entries_remove(&mut self, key: &HgPath) -> bool;
+
+    /// Mark the given path as "non-normal" file.
+    /// This is only relevant in the flat dirstate map where there is a
+    /// separate `HashSet` that needs to be kept up to date.
+    fn non_normal_entries_add(&mut self, key: &HgPath);
 
     /// Return an iterator of paths whose respective entry are either
     /// "non-normal" (see `non_normal_entries_contains`) or "from other
@@ -305,6 +313,14 @@
         self.clear()
     }
 
+    /// Used to set a value directory.
+    ///
+    /// XXX Is temporary during a refactor of V1 dirstate and will disappear
+    /// shortly.
+    fn set_v1(&mut self, filename: &HgPath, entry: DirstateEntry) {
+        self.set_v1_inner(&filename, entry)
+    }
+
     fn add_file(
         &mut self,
         filename: &HgPath,
@@ -346,10 +362,14 @@
         Ok(non_normal.contains(key))
     }
 
-    fn non_normal_entries_remove(&mut self, key: &HgPath) {
+    fn non_normal_entries_remove(&mut self, key: &HgPath) -> bool {
         self.non_normal_entries_remove(key)
     }
 
+    fn non_normal_entries_add(&mut self, key: &HgPath) {
+        self.non_normal_entries_add(key)
+    }
+
     fn non_normal_or_other_parent_paths(
         &mut self,
     ) -> Box<dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + '_>
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs	Fri Jul 16 22:30:11 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs	Mon Jul 19 07:23:55 2021 +0200
@@ -117,6 +117,21 @@
         }
     }
 
+    def set_v1(&self, path: PyObject, item: PyObject) -> PyResult<PyObject> {
+        let f = path.extract::<PyBytes>(py)?;
+        let filename = HgPath::new(f.data(py));
+        let state = item.getattr(py, "state")?.extract::<PyBytes>(py)?;
+        let state = state.data(py)[0];
+        let entry = DirstateEntry {
+            state: state.try_into().expect("state is always valid"),
+            mtime: item.getattr(py, "mtime")?.extract(py)?,
+            size: item.getattr(py, "size")?.extract(py)?,
+            mode: item.getattr(py, "mode")?.extract(py)?,
+        };
+        self.inner(py).borrow_mut().set_v1(filename, entry);
+        Ok(py.None())
+    }
+
     def addfile(
         &self,
         f: PyObject,
@@ -261,6 +276,22 @@
 
     def non_normal_entries_remove(&self, key: PyObject) -> PyResult<PyObject> {
         let key = key.extract::<PyBytes>(py)?;
+        let key = key.data(py);
+        let was_present = self
+            .inner(py)
+            .borrow_mut()
+            .non_normal_entries_remove(HgPath::new(key));
+        if !was_present {
+            let msg = String::from_utf8_lossy(key);
+            Err(PyErr::new::<exc::KeyError, _>(py, msg))
+        } else {
+            Ok(py.None())
+        }
+    }
+
+    def non_normal_entries_discard(&self, key: PyObject) -> PyResult<PyObject>
+    {
+        let key = key.extract::<PyBytes>(py)?;
         self
             .inner(py)
             .borrow_mut()
@@ -268,6 +299,15 @@
         Ok(py.None())
     }
 
+    def non_normal_entries_add(&self, key: PyObject) -> PyResult<PyObject> {
+        let key = key.extract::<PyBytes>(py)?;
+        self
+            .inner(py)
+            .borrow_mut()
+            .non_normal_entries_add(HgPath::new(key.data(py)));
+        Ok(py.None())
+    }
+
     def non_normal_or_other_parent_paths(&self) -> PyResult<PyList> {
         let mut inner = self.inner(py).borrow_mut();
 
--- a/rust/hg-cpython/src/dirstate/dispatch.rs	Fri Jul 16 22:30:11 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/dispatch.rs	Mon Jul 19 07:23:55 2021 +0200
@@ -20,6 +20,10 @@
         self.get_mut().clear()
     }
 
+    fn set_v1(&mut self, filename: &HgPath, entry: DirstateEntry) {
+        self.get_mut().set_v1(filename, entry)
+    }
+
     fn add_file(
         &mut self,
         filename: &HgPath,
@@ -66,10 +70,14 @@
         self.get_mut().non_normal_entries_contains(key)
     }
 
-    fn non_normal_entries_remove(&mut self, key: &HgPath) {
+    fn non_normal_entries_remove(&mut self, key: &HgPath) -> bool {
         self.get_mut().non_normal_entries_remove(key)
     }
 
+    fn non_normal_entries_add(&mut self, key: &HgPath) {
+        self.get_mut().non_normal_entries_add(key)
+    }
+
     fn non_normal_or_other_parent_paths(
         &mut self,
     ) -> Box<dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>> + '_>
--- a/rust/hg-cpython/src/dirstate/non_normal_entries.rs	Fri Jul 16 22:30:11 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/non_normal_entries.rs	Mon Jul 19 07:23:55 2021 +0200
@@ -26,6 +26,12 @@
     def remove(&self, key: PyObject) -> PyResult<PyObject> {
         self.dmap(py).non_normal_entries_remove(py, key)
     }
+    def add(&self, key: PyObject) -> PyResult<PyObject> {
+        self.dmap(py).non_normal_entries_add(py, key)
+    }
+    def discard(&self, key: PyObject) -> PyResult<PyObject> {
+        self.dmap(py).non_normal_entries_discard(py, key)
+    }
     def __richcmp__(&self, other: PyObject, op: CompareOp) -> PyResult<bool> {
         match op {
             CompareOp::Eq => self.is_equal_to(py, other),