dirstate-v2: Separate iterators for dirfoldmap and debugdirstate
authorSimon Sapin <simon.sapin@octobus.net>
Fri, 16 Jul 2021 14:08:26 +0200
changeset 47683 284a20269a97
parent 47682 78f7f0d490ee
child 47684 852262e2e7d9
dirstate-v2: Separate iterators for dirfoldmap and debugdirstate `dirstatemap.dirfoldmap` was recently changed to re-use a Rust iterator that was added for the `hg debugdirstate` command. That iterator was based on all nodes in the tree dirstate without an entry only existing to hold child nodes, and therefore being directories. However to optimize status further we may want to store additional nodes for unknown or ignored files and directories. At that point the two users of this iterator will want different things, so let’s make two iterators instead. See doc-comments in `dispatch.rs`. Differential Revision: https://phab.mercurial-scm.org/D11099
mercurial/debugcommands.py
mercurial/dirstatemap.py
rust/hg-core/src/dirstate.rs
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/dispatch.rs
rust/hg-cpython/src/dirstate.rs
rust/hg-cpython/src/dirstate/dirstate_map.rs
rust/hg-cpython/src/dirstate/dispatch.rs
tests/test-completion.t
tests/test-status.t
--- a/mercurial/debugcommands.py	Thu Jul 15 23:02:17 2021 +0200
+++ b/mercurial/debugcommands.py	Fri Jul 16 14:08:26 2021 +0200
@@ -942,7 +942,12 @@
         ),
         (b'', b'dates', True, _(b'display the saved mtime')),
         (b'', b'datesort', None, _(b'sort by saved mtime')),
-        (b'', b'dirs', False, _(b'display directories')),
+        (
+            b'',
+            b'all',
+            False,
+            _(b'display dirstate-v2 tree nodes that would not exist in v1'),
+        ),
     ],
     _(b'[OPTION]...'),
 )
@@ -961,9 +966,10 @@
         )  # sort by mtime, then by filename
     else:
         keyfunc = None  # sort by filename
-    entries = list(pycompat.iteritems(repo.dirstate))
-    if opts['dirs']:
-        entries.extend(repo.dirstate.directories())
+    if opts['all']:
+        entries = list(repo.dirstate._map.debug_iter())
+    else:
+        entries = list(pycompat.iteritems(repo.dirstate))
     entries.sort(key=keyfunc)
     for file_, ent in entries:
         if ent.v1_mtime() == -1:
--- a/mercurial/dirstatemap.py	Thu Jul 15 23:02:17 2021 +0200
+++ b/mercurial/dirstatemap.py	Fri Jul 16 14:08:26 2021 +0200
@@ -105,10 +105,6 @@
         self._map
         return self.copymap
 
-    def directories(self):
-        # Rust / dirstate-v2 only
-        return []
-
     def clear(self):
         self._map.clear()
         self.copymap.clear()
@@ -126,6 +122,8 @@
     # forward for python2,3 compat
     iteritems = items
 
+    debug_iter = items
+
     def __len__(self):
         return len(self._map)
 
@@ -525,6 +523,9 @@
         def directories(self):
             return self._rustmap.directories()
 
+        def debug_iter(self):
+            return self._rustmap.debug_iter()
+
         def preload(self):
             self._rustmap
 
@@ -746,6 +747,6 @@
         def dirfoldmap(self):
             f = {}
             normcase = util.normcase
-            for name, _pseudo_entry in self.directories():
+            for name in self._rustmap.tracked_dirs():
                 f[normcase(name)] = name
             return f
--- a/rust/hg-core/src/dirstate.rs	Thu Jul 15 23:02:17 2021 +0200
+++ b/rust/hg-core/src/dirstate.rs	Fri Jul 16 14:08:26 2021 +0200
@@ -65,6 +65,12 @@
         let fs_exec_bit = filesystem_metadata.mode() & EXEC_BIT_MASK;
         dirstate_exec_bit != fs_exec_bit
     }
+
+    /// Returns a `(state, mode, size, mtime)` tuple as for
+    /// `DirstateMapMethods::debug_iter`.
+    pub fn debug_tuple(&self) -> (u8, i32, i32, i32) {
+        (self.state.into(), self.mode, self.size, self.mtime)
+    }
 }
 
 #[derive(BytesCast)]
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Thu Jul 15 23:02:17 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Fri Jul 16 14:08:26 2021 +0200
@@ -1246,27 +1246,50 @@
         }))
     }
 
-    fn iter_directories(
+    fn iter_tracked_dirs(
+        &mut self,
+    ) -> Result<
+        Box<
+            dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
+                + Send
+                + '_,
+        >,
+        DirstateError,
+    > {
+        let on_disk = self.on_disk;
+        Ok(Box::new(filter_map_results(
+            self.iter_nodes(),
+            move |node| {
+                Ok(if node.tracked_descendants_count() > 0 {
+                    Some(node.full_path(on_disk)?)
+                } else {
+                    None
+                })
+            },
+        )))
+    }
+
+    fn debug_iter(
         &self,
     ) -> Box<
         dyn Iterator<
                 Item = Result<
-                    (&HgPath, Option<Timestamp>),
+                    (&HgPath, (u8, i32, i32, i32)),
                     DirstateV2ParseError,
                 >,
             > + Send
             + '_,
     > {
-        Box::new(filter_map_results(self.iter_nodes(), move |node| {
-            Ok(if node.state()?.is_none() {
-                Some((
-                    node.full_path(self.on_disk)?,
-                    node.cached_directory_mtime()
-                        .map(|mtime| Timestamp(mtime.seconds())),
-                ))
+        Box::new(self.iter_nodes().map(move |node| {
+            let node = node?;
+            let debug_tuple = if let Some(entry) = node.entry()? {
+                entry.debug_tuple()
+            } else if let Some(mtime) = node.cached_directory_mtime() {
+                (b' ', 0, -1, mtime.seconds() as i32)
             } else {
-                None
-            })
+                (b' ', 0, -1, -1)
+            };
+            Ok((node.full_path(self.on_disk)?, debug_tuple))
         }))
     }
 }
--- a/rust/hg-core/src/dirstate_tree/dispatch.rs	Thu Jul 15 23:02:17 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dispatch.rs	Fri Jul 16 14:08:26 2021 +0200
@@ -259,20 +259,40 @@
     /// are `Result`s.
     fn iter(&self) -> StateMapIter<'_>;
 
-    /// In the tree dirstate, return an iterator of "directory" (entry-less)
-    /// nodes with the data stored for them. This is for `hg debugdirstate
-    /// --dirs`.
+    /// Returns an iterator of tracked directories.
     ///
-    /// In the flat dirstate, returns an empty iterator.
+    /// This is the paths for which `has_tracked_dir` would return true.
+    /// Or, in other words, the union of ancestor paths of all paths that have
+    /// an associated entry in a "tracked" state in this dirstate map.
     ///
     /// Because parse errors can happen during iteration, the iterated items
     /// are `Result`s.
-    fn iter_directories(
+    fn iter_tracked_dirs(
+        &mut self,
+    ) -> Result<
+        Box<
+            dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
+                + Send
+                + '_,
+        >,
+        DirstateError,
+    >;
+
+    /// Return an iterator of `(path, (state, mode, size, mtime))` for every
+    /// node stored in this dirstate map, for the purpose of the `hg
+    /// debugdirstate` command.
+    ///
+    /// For nodes that don’t have an entry, `state` is the ASCII space.
+    /// An `mtime` may still be present. It is used to optimize `status`.
+    ///
+    /// Because parse errors can happen during iteration, the iterated items
+    /// are `Result`s.
+    fn debug_iter(
         &self,
     ) -> Box<
         dyn Iterator<
                 Item = Result<
-                    (&HgPath, Option<Timestamp>),
+                    (&HgPath, (u8, i32, i32, i32)),
                     DirstateV2ParseError,
                 >,
             > + Send
@@ -476,17 +496,41 @@
         Box::new((&**self).iter().map(|(key, value)| Ok((&**key, *value))))
     }
 
-    fn iter_directories(
+    fn iter_tracked_dirs(
+        &mut self,
+    ) -> Result<
+        Box<
+            dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
+                + Send
+                + '_,
+        >,
+        DirstateError,
+    > {
+        self.set_all_dirs()?;
+        Ok(Box::new(
+            self.all_dirs
+                .as_ref()
+                .unwrap()
+                .iter()
+                .map(|path| Ok(&**path)),
+        ))
+    }
+
+    fn debug_iter(
         &self,
     ) -> Box<
         dyn Iterator<
                 Item = Result<
-                    (&HgPath, Option<Timestamp>),
+                    (&HgPath, (u8, i32, i32, i32)),
                     DirstateV2ParseError,
                 >,
             > + Send
             + '_,
     > {
-        Box::new(std::iter::empty())
+        Box::new(
+            (&**self)
+                .iter()
+                .map(|(path, entry)| Ok((&**path, entry.debug_tuple()))),
+        )
     }
 }
--- a/rust/hg-cpython/src/dirstate.rs	Thu Jul 15 23:02:17 2021 +0200
+++ b/rust/hg-cpython/src/dirstate.rs	Fri Jul 16 14:08:26 2021 +0200
@@ -52,9 +52,6 @@
     py: Python,
     entry: &DirstateEntry,
 ) -> PyResult<PyObject> {
-    // might be silly to retrieve capsule function in hot loop
-    let make = make_dirstate_item_capi::retrieve(py)?;
-
     let &DirstateEntry {
         state,
         mode,
@@ -65,22 +62,19 @@
     // because Into<u8> has a specific implementation while `as c_char` would
     // just do a naive enum cast.
     let state_code: u8 = state.into();
-
-    let maybe_obj = unsafe {
-        let ptr = make(state_code as c_char, mode, size, mtime);
-        PyObject::from_owned_ptr_opt(py, ptr)
-    };
-    maybe_obj.ok_or_else(|| PyErr::fetch(py))
+    make_dirstate_item_raw(py, state_code, mode, size, mtime)
 }
 
-// XXX a bit strange to have a dedicated function, but directory are not
-// treated as dirstate node by hg-core for now so…
-pub fn make_directory_item(py: Python, mtime: i32) -> PyResult<PyObject> {
-    // might be silly to retrieve capsule function in hot loop
+pub fn make_dirstate_item_raw(
+    py: Python,
+    state: u8,
+    mode: i32,
+    size: i32,
+    mtime: i32,
+) -> PyResult<PyObject> {
     let make = make_dirstate_item_capi::retrieve(py)?;
-
     let maybe_obj = unsafe {
-        let ptr = make(b'd' as c_char, 0 as i32, 0 as i32, mtime);
+        let ptr = make(state as c_char, mode, size, mtime);
         PyObject::from_owned_ptr_opt(py, ptr)
     };
     maybe_obj.ok_or_else(|| PyErr::fetch(py))
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs	Thu Jul 15 23:02:17 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs	Fri Jul 16 14:08:26 2021 +0200
@@ -19,8 +19,8 @@
 
 use crate::{
     dirstate::copymap::{CopyMap, CopyMapItemsIterator, CopyMapKeysIterator},
-    dirstate::make_directory_item,
     dirstate::make_dirstate_item,
+    dirstate::make_dirstate_item_raw,
     dirstate::non_normal_entries::{
         NonNormalEntries, NonNormalEntriesIterator,
     },
@@ -61,17 +61,14 @@
         use_dirstate_tree: bool,
         on_disk: PyBytes,
     ) -> PyResult<PyObject> {
-        let dirstate_error = |e: DirstateError| {
-            PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
-        };
         let (inner, parents) = if use_dirstate_tree {
             let (map, parents) = OwningDirstateMap::new_v1(py, on_disk)
-                .map_err(dirstate_error)?;
+                .map_err(|e| dirstate_error(py, e))?;
             (Box::new(map) as _, parents)
         } else {
             let bytes = on_disk.data(py);
             let mut map = RustDirstateMap::default();
-            let parents = map.read(bytes).map_err(dirstate_error)?;
+            let parents = map.read(bytes).map_err(|e| dirstate_error(py, e))?;
             (Box::new(map) as _, parents)
         };
         let map = Self::create_instance(py, inner)?;
@@ -550,19 +547,29 @@
         )
     }
 
-    def directories(&self) -> PyResult<PyList> {
+    def tracked_dirs(&self) -> PyResult<PyList> {
         let dirs = PyList::new(py, &[]);
-        for item in self.inner(py).borrow().iter_directories() {
-            let (path, mtime) = item.map_err(|e| v2_error(py, e))?;
+        for path in self.inner(py).borrow_mut().iter_tracked_dirs()
+            .map_err(|e |dirstate_error(py, e))?
+        {
+            let path = path.map_err(|e| v2_error(py, e))?;
             let path = PyBytes::new(py, path.as_bytes());
-            let mtime = mtime.map(|t| t.0).unwrap_or(-1);
-            let item = make_directory_item(py, mtime as i32)?;
-            let tuple = (path, item);
-            dirs.append(py, tuple.to_py_object(py).into_object())
+            dirs.append(py, path.into_object())
         }
         Ok(dirs)
     }
 
+    def debug_iter(&self) -> PyResult<PyList> {
+        let dirs = PyList::new(py, &[]);
+        for item in self.inner(py).borrow().debug_iter() {
+            let (path, (state, mode, size, mtime)) =
+                item.map_err(|e| v2_error(py, e))?;
+            let path = PyBytes::new(py, path.as_bytes());
+            let item = make_dirstate_item_raw(py, state, mode, size, mtime)?;
+            dirs.append(py, (path, item).to_py_object(py).into_object())
+        }
+        Ok(dirs)
+    }
 });
 
 impl DirstateMap {
@@ -616,3 +623,7 @@
 pub(super) fn v2_error(py: Python<'_>, _: DirstateV2ParseError) -> PyErr {
     PyErr::new::<exc::ValueError, _>(py, "corrupted dirstate-v2")
 }
+
+fn dirstate_error(py: Python<'_>, e: DirstateError) -> PyErr {
+    PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
+}
--- a/rust/hg-cpython/src/dirstate/dispatch.rs	Thu Jul 15 23:02:17 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/dispatch.rs	Fri Jul 16 14:08:26 2021 +0200
@@ -203,17 +203,30 @@
         self.get().iter()
     }
 
-    fn iter_directories(
+    fn iter_tracked_dirs(
+        &mut self,
+    ) -> Result<
+        Box<
+            dyn Iterator<Item = Result<&HgPath, DirstateV2ParseError>>
+                + Send
+                + '_,
+        >,
+        DirstateError,
+    > {
+        self.get_mut().iter_tracked_dirs()
+    }
+
+    fn debug_iter(
         &self,
     ) -> Box<
         dyn Iterator<
                 Item = Result<
-                    (&HgPath, Option<Timestamp>),
+                    (&HgPath, (u8, i32, i32, i32)),
                     DirstateV2ParseError,
                 >,
             > + Send
             + '_,
     > {
-        self.get().iter_directories()
+        self.get().debug_iter()
     }
 }
--- a/tests/test-completion.t	Thu Jul 15 23:02:17 2021 +0200
+++ b/tests/test-completion.t	Fri Jul 16 14:08:26 2021 +0200
@@ -284,7 +284,7 @@
   debugdate: extended
   debugdeltachain: changelog, manifest, dir, template
   debugdirstateignorepatternshash: 
-  debugdirstate: nodates, dates, datesort, dirs
+  debugdirstate: nodates, dates, datesort, all
   debugdiscovery: old, nonheads, rev, seed, local-as-revs, remote-as-revs, ssh, remotecmd, insecure, template
   debugdownload: output
   debugextensions: template
--- a/tests/test-status.t	Thu Jul 15 23:02:17 2021 +0200
+++ b/tests/test-status.t	Fri Jul 16 14:08:26 2021 +0200
@@ -929,23 +929,23 @@
 
 The cached mtime is initially unset
 
-  $ hg debugdirstate --dirs --no-dates | grep '^d'
-  d   0          0 unset               subdir
+  $ hg debugdirstate --all --no-dates | grep '^ '
+      0         -1 unset               subdir
 
 It is still not set when there are unknown files
 
   $ touch subdir/unknown
   $ hg status
   ? subdir/unknown
-  $ hg debugdirstate --dirs --no-dates | grep '^d'
-  d   0          0 unset               subdir
+  $ hg debugdirstate --all --no-dates | grep '^ '
+      0         -1 unset               subdir
 
 Now the directory is eligible for caching, so its mtime is save in the dirstate
 
   $ rm subdir/unknown
   $ hg status
-  $ hg debugdirstate --dirs --no-dates | grep '^d'
-  d   0          0 set                 subdir
+  $ hg debugdirstate --all --no-dates | grep '^ '
+      0         -1 set                 subdir
 
 This time the command should be ever so slightly faster since it does not need `read_dir("subdir")`
 
@@ -963,11 +963,11 @@
 Removing a node from the dirstate resets the cache for its parent directory
 
   $ hg forget subdir/a
-  $ hg debugdirstate --dirs --no-dates | grep '^d'
-  d   0          0 set                 subdir
+  $ hg debugdirstate --all --no-dates | grep '^ '
+      0         -1 set                 subdir
   $ hg ci -qm '#1'
-  $ hg debugdirstate --dirs --no-dates | grep '^d'
-  d   0          0 unset               subdir
+  $ hg debugdirstate --all --no-dates | grep '^ '
+      0         -1 unset               subdir
   $ hg status
   ? subdir/a