rust/hg-core/src/dirstate/dirstate_tree/iter.rs
changeset 45562 b51167d70f5a
child 45588 c35db907363d
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/dirstate/dirstate_tree/iter.rs	Fri Sep 25 17:51:34 2020 +0200
@@ -0,0 +1,358 @@
+// iter.rs
+//
+// Copyright 2020, Raphaël Gomès <rgomes@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+use super::node::{Node, NodeKind};
+use super::tree::Tree;
+use crate::dirstate::dirstate_tree::node::Directory;
+use crate::dirstate::status::Dispatch;
+use crate::utils::hg_path::{hg_path_to_path_buf, HgPath, HgPathBuf};
+use crate::DirstateEntry;
+use std::borrow::Cow;
+use std::collections::VecDeque;
+use std::iter::{FromIterator, FusedIterator};
+use std::path::PathBuf;
+
+impl FromIterator<(HgPathBuf, DirstateEntry)> for Tree {
+    fn from_iter<T: IntoIterator<Item = (HgPathBuf, DirstateEntry)>>(iter: T) -> Self {
+        let mut tree = Self::new();
+        for (path, entry) in iter {
+            tree.insert(path, entry);
+        }
+        tree
+    }
+}
+
+/// Iterator of all entries in the dirstate tree.
+///
+/// It has no particular ordering.
+pub struct Iter<'a> {
+    to_visit: VecDeque<(Cow<'a, [u8]>, &'a Node)>,
+}
+
+impl<'a> Iter<'a> {
+    pub fn new(node: &'a Node) -> Iter<'a> {
+        let mut to_visit = VecDeque::new();
+        to_visit.push_back((Cow::Borrowed(&b""[..]), node));
+        Self { to_visit }
+    }
+}
+
+impl<'a> Iterator for Iter<'a> {
+    type Item = (HgPathBuf, DirstateEntry);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        while let Some((base_path, node)) = self.to_visit.pop_front() {
+            match &node.kind {
+                NodeKind::Directory(dir) => {
+                    add_children_to_visit(&mut self.to_visit, &base_path, &dir);
+                    if let Some(file) = &dir.was_file {
+                        return Some((HgPathBuf::from_bytes(&base_path), file.entry));
+                    }
+                }
+                NodeKind::File(file) => {
+                    if let Some(dir) = &file.was_directory {
+                        add_children_to_visit(&mut self.to_visit, &base_path, &dir);
+                    }
+                    return Some((HgPathBuf::from_bytes(&base_path), file.entry));
+                }
+            }
+        }
+        None
+    }
+}
+
+impl<'a> FusedIterator for Iter<'a> {}
+
+/// Iterator of all entries in the dirstate tree, with a special filesystem
+/// handling for the directories containing said entries.
+///
+/// It checks every directory on-disk to see if it has become a symlink, to
+/// prevent a potential security issue.
+/// Using this information, it may dispatch `status` information early: it
+/// returns canonical paths along with `Shortcut`s, which are either a
+/// `DirstateEntry` or a `Dispatch`, if the fate of said path has already been
+/// determined.
+///
+/// Like `Iter`, it has no particular ordering.
+pub struct FsIter<'a> {
+    root_dir: PathBuf,
+    to_visit: VecDeque<(Cow<'a, [u8]>, &'a Node)>,
+    shortcuts: VecDeque<(HgPathBuf, StatusShortcut)>,
+}
+
+impl<'a> FsIter<'a> {
+    pub fn new(node: &'a Node, root_dir: PathBuf) -> FsIter<'a> {
+        let mut to_visit = VecDeque::new();
+        to_visit.push_back((Cow::Borrowed(&b""[..]), node));
+        Self {
+            root_dir,
+            to_visit,
+            shortcuts: Default::default(),
+        }
+    }
+
+    /// Mercurial tracks symlinks but *not* what they point to.
+    /// If a directory is moved and symlinked:
+    ///
+    /// ```bash
+    /// $ mkdir foo
+    /// $ touch foo/a
+    /// $ # commit...
+    /// $ mv foo bar
+    /// $ ln -s bar foo
+    /// ```
+    /// We need to dispatch the new symlink as `Unknown` and all the
+    /// descendents of the directory it replace as `Deleted`.
+    fn dispatch_symlinked_directory(&mut self, path: impl AsRef<HgPath>, node: &Node) {
+        let path = path.as_ref();
+        self.shortcuts
+            .push_back((path.to_owned(), StatusShortcut::Dispatch(Dispatch::Unknown)));
+        for (file, _) in node.iter() {
+            self.shortcuts.push_back((
+                path.join(&file),
+                StatusShortcut::Dispatch(Dispatch::Deleted),
+            ));
+        }
+    }
+
+    /// Returns `true` if the canonical `path` of a directory corresponds to a
+    /// symlink on disk. It means it was moved and symlinked after the last
+    /// dirstate update.
+    ///
+    /// # Special cases
+    ///
+    /// Returns `false` for the repository root.
+    /// Returns `false` on io error, error handling is outside of the iterator.
+    fn directory_became_symlink(&mut self, path: &HgPath) -> bool {
+        if path.is_empty() {
+            return false;
+        }
+        let filename_as_path = match hg_path_to_path_buf(&path) {
+            Ok(p) => p,
+            _ => return false,
+        };
+        let meta = self.root_dir.join(filename_as_path).symlink_metadata();
+        match meta {
+            Ok(ref m) if m.file_type().is_symlink() => true,
+            _ => return false,
+        }
+    }
+}
+
+/// Returned by `FsIter`, since the `Dispatch` of any given entry may already
+/// be determined during the iteration. This is necessary for performance
+/// reasons, since hierarchical information is needed to `Dispatch` an entire
+/// subtree efficiently.
+#[derive(Debug, Copy, Clone)]
+pub enum StatusShortcut {
+    /// A entry in the dirstate for further inspection
+    Entry(DirstateEntry),
+    /// The result of the status of the corresponding file
+    Dispatch(Dispatch),
+}
+
+impl<'a> Iterator for FsIter<'a> {
+    type Item = (HgPathBuf, StatusShortcut);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // If any paths have already been `Dispatch`-ed, return them
+        while let Some(res) = self.shortcuts.pop_front() {
+            return Some(res);
+        }
+
+        while let Some((base_path, node)) = self.to_visit.pop_front() {
+            match &node.kind {
+                NodeKind::Directory(dir) => {
+                    let canonical_path = HgPath::new(&base_path);
+                    if self.directory_became_symlink(canonical_path) {
+                        // Potential security issue, don't do a normal
+                        // traversal, force the results.
+                        self.dispatch_symlinked_directory(canonical_path, &node);
+                        continue;
+                    }
+                    add_children_to_visit(&mut self.to_visit, &base_path, &dir);
+                    if let Some(file) = &dir.was_file {
+                        return Some((
+                            HgPathBuf::from_bytes(&base_path),
+                            StatusShortcut::Entry(file.entry),
+                        ));
+                    }
+                }
+                NodeKind::File(file) => {
+                    if let Some(dir) = &file.was_directory {
+                        add_children_to_visit(&mut self.to_visit, &base_path, &dir);
+                    }
+                    return Some((
+                        HgPathBuf::from_bytes(&base_path),
+                        StatusShortcut::Entry(file.entry),
+                    ));
+                }
+            }
+        }
+
+        None
+    }
+}
+
+impl<'a> FusedIterator for FsIter<'a> {}
+
+fn join_path<'a, 'b>(path: &'a [u8], other: &'b [u8]) -> Cow<'b, [u8]> {
+    if path.is_empty() {
+        other.into()
+    } else {
+        [path, &b"/"[..], other].concat().into()
+    }
+}
+
+/// Adds all children of a given directory `dir` to the visit queue `to_visit`
+/// prefixed by a `base_path`.
+fn add_children_to_visit<'a>(
+    to_visit: &mut VecDeque<(Cow<'a, [u8]>, &'a Node)>,
+    base_path: &[u8],
+    dir: &'a Directory,
+) {
+    to_visit.extend(dir.children.iter().map(|(path, child)| {
+        let full_path = join_path(&base_path, &path);
+        (Cow::from(full_path), child)
+    }));
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::utils::hg_path::HgPath;
+    use crate::{EntryState, FastHashMap};
+    use std::collections::HashSet;
+
+    #[test]
+    fn test_iteration() {
+        let mut tree = Tree::new();
+
+        assert_eq!(
+            tree.insert(
+                HgPathBuf::from_bytes(b"foo/bar"),
+                DirstateEntry {
+                    state: EntryState::Merged,
+                    mode: 41,
+                    mtime: 42,
+                    size: 43,
+                }
+            ),
+            None
+        );
+
+        assert_eq!(
+            tree.insert(
+                HgPathBuf::from_bytes(b"foo2"),
+                DirstateEntry {
+                    state: EntryState::Merged,
+                    mode: 40,
+                    mtime: 41,
+                    size: 42,
+                }
+            ),
+            None
+        );
+
+        assert_eq!(
+            tree.insert(
+                HgPathBuf::from_bytes(b"foo/baz"),
+                DirstateEntry {
+                    state: EntryState::Normal,
+                    mode: 0,
+                    mtime: 0,
+                    size: 0,
+                }
+            ),
+            None
+        );
+
+        assert_eq!(
+            tree.insert(
+                HgPathBuf::from_bytes(b"foo/bap/nested"),
+                DirstateEntry {
+                    state: EntryState::Normal,
+                    mode: 0,
+                    mtime: 0,
+                    size: 0,
+                }
+            ),
+            None
+        );
+
+        assert_eq!(tree.len(), 4);
+
+        let results: HashSet<_> = tree.iter().map(|(c, _)| c.to_owned()).collect();
+        dbg!(&results);
+        assert!(results.contains(HgPath::new(b"foo2")));
+        assert!(results.contains(HgPath::new(b"foo/bar")));
+        assert!(results.contains(HgPath::new(b"foo/baz")));
+        assert!(results.contains(HgPath::new(b"foo/bap/nested")));
+
+        let mut iter = tree.iter();
+        assert!(iter.next().is_some());
+        assert!(iter.next().is_some());
+        assert!(iter.next().is_some());
+        assert!(iter.next().is_some());
+        assert_eq!(None, iter.next());
+        assert_eq!(None, iter.next());
+        drop(iter);
+
+        assert_eq!(
+            tree.insert(
+                HgPathBuf::from_bytes(b"foo/bap/nested/a"),
+                DirstateEntry {
+                    state: EntryState::Normal,
+                    mode: 0,
+                    mtime: 0,
+                    size: 0,
+                }
+            ),
+            None
+        );
+
+        let results: FastHashMap<_, _> = tree.iter().collect();
+        assert!(results.contains_key(HgPath::new(b"foo2")));
+        assert!(results.contains_key(HgPath::new(b"foo/bar")));
+        assert!(results.contains_key(HgPath::new(b"foo/baz")));
+        // Is a dir but `was_file`, so it's listed as a removed file
+        assert!(results.contains_key(HgPath::new(b"foo/bap/nested")));
+        assert!(results.contains_key(HgPath::new(b"foo/bap/nested/a")));
+
+        // insert removed file (now directory) after nested file
+        assert_eq!(
+            tree.insert(
+                HgPathBuf::from_bytes(b"a/a"),
+                DirstateEntry {
+                    state: EntryState::Normal,
+                    mode: 0,
+                    mtime: 0,
+                    size: 0,
+                }
+            ),
+            None
+        );
+
+        // `insert` returns `None` for a directory
+        assert_eq!(
+            tree.insert(
+                HgPathBuf::from_bytes(b"a"),
+                DirstateEntry {
+                    state: EntryState::Removed,
+                    mode: 0,
+                    mtime: 0,
+                    size: 0,
+                }
+            ),
+            None
+        );
+
+        let results: FastHashMap<_, _> = tree.iter().collect();
+        assert!(results.contains_key(HgPath::new(b"a")));
+        assert!(results.contains_key(HgPath::new(b"a/a")));
+    }
+}