rust/hg-core/src/dirstate/dirstate_tree/tree.rs
changeset 45562 b51167d70f5a
child 45588 c35db907363d
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/dirstate/dirstate_tree/tree.rs	Fri Sep 25 17:51:34 2020 +0200
@@ -0,0 +1,661 @@
+// tree.rs
+//
+// Copyright 2020, Raphaël Gomès <rgomes@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+use super::iter::Iter;
+use super::node::{Directory, Node, NodeKind};
+use crate::dirstate::dirstate_tree::iter::FsIter;
+use crate::dirstate::dirstate_tree::node::{InsertResult, RemoveResult};
+use crate::utils::hg_path::{HgPath, HgPathBuf};
+use crate::DirstateEntry;
+use std::path::PathBuf;
+
+/// A specialized tree to represent the Mercurial dirstate.
+///
+/// # Advantages over a flat structure
+///
+/// The dirstate is inherently hierarchical, since it's a representation of the
+/// file structure of the project. The current dirstate format is flat, and
+/// while that affords us potentially great (unordered) iteration speeds, the
+/// need to retrieve a given path is great enough that you need some kind of
+/// hashmap or tree in a lot of cases anyway.
+///
+/// Going with a tree allows us to be smarter:
+///   - Skipping an ignored directory means we don't visit its entire subtree
+///   - Security auditing does not need to reconstruct paths backwards to check
+///     for symlinked directories, this can be done during the iteration in a
+///     very efficient fashion
+///   - We don't need to build the directory information in another struct,
+///     simplifying the code a lot, reducing the memory footprint and
+///     potentially going faster depending on the implementation.
+///   - We can use it to store a (platform-dependent) caching mechanism [1]
+///   - And probably other types of optimizations.
+///
+/// Only the first two items in this list are implemented as of this commit.
+///
+/// [1]: https://www.mercurial-scm.org/wiki/DirsCachePlan
+///       
+///
+/// # Structure
+///
+/// It's a prefix (radix) tree with no fixed arity, with a granularity of a
+/// folder, allowing it to mimic a filesystem hierarchy:
+///
+/// ```text
+/// foo/bar
+/// foo/baz
+/// test
+/// ```
+/// Will be represented (simplified) by:
+///
+/// ```text
+/// Directory(root):
+///   - File("test")
+///   - Directory("foo"):
+///     - File("bar")
+///     - File("baz")
+/// ```
+///
+/// Moreover, it is special-cased for storing the dirstate and as such handles
+/// cases that a simple `HashMap` would handle, but while preserving the
+/// hierarchy.
+/// For example:
+///
+/// ```shell
+/// $ touch foo
+/// $ hg add foo
+/// $ hg commit -m "foo"
+/// $ hg remove foo
+/// $ rm foo
+/// $ mkdir foo
+/// $ touch foo/a
+/// $ hg add foo/a
+/// $ hg status
+///   R foo
+///   A foo/a
+/// ```
+/// To represent this in a tree, one needs to keep track of whether any given
+/// file was a directory and whether any given directory was a file at the last
+/// dirstate update. This tree stores that information, but only in the right
+/// circumstances by respecting the high-level rules that prevent nonsensical
+/// structures to exist:
+///     - a file can only be added as a child of another file if the latter is
+///       marked as `Removed`
+///     - a file cannot replace a folder unless all its descendents are removed
+///
+/// This second rule is not checked by the tree for performance reasons, and
+/// because high-level logic already prevents that state from happening.
+///
+/// # Ordering
+///
+/// It makes no guarantee of ordering for now.
+#[derive(Debug, Default, Clone, PartialEq)]
+pub struct Tree {
+    pub root: Node,
+    files_count: usize,
+}
+
+impl Tree {
+    pub fn new() -> Self {
+        Self {
+            root: Node {
+                kind: NodeKind::Directory(Directory {
+                    was_file: None,
+                    children: Default::default(),
+                }),
+            },
+            files_count: 0,
+        }
+    }
+
+    /// How many files (not directories) are stored in the tree, including ones
+    /// marked as `Removed`.
+    pub fn len(&self) -> usize {
+        self.files_count
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Inserts a file in the tree and returns the previous entry if any.
+    pub fn insert(
+        &mut self,
+        path: impl AsRef<HgPath>,
+        kind: DirstateEntry,
+    ) -> Option<DirstateEntry> {
+        let old = self.insert_node(path, kind);
+        match old?.kind {
+            NodeKind::Directory(_) => None,
+            NodeKind::File(f) => Some(f.entry),
+        }
+    }
+
+    /// Low-level insertion method that returns the previous node (directories
+    /// included).
+    fn insert_node(&mut self, path: impl AsRef<HgPath>, kind: DirstateEntry) -> Option<Node> {
+        let InsertResult {
+            did_insert,
+            old_entry,
+        } = self.root.insert(path.as_ref().as_bytes(), kind);
+        self.files_count += if did_insert { 1 } else { 0 };
+        old_entry
+    }
+
+    /// Returns a reference to a node if it exists.
+    pub fn get_node(&self, path: impl AsRef<HgPath>) -> Option<&Node> {
+        self.root.get(path.as_ref().as_bytes())
+    }
+
+    /// Returns a reference to the entry corresponding to `path` if it exists.
+    pub fn get(&self, path: impl AsRef<HgPath>) -> Option<&DirstateEntry> {
+        if let Some(node) = self.get_node(&path) {
+            return match &node.kind {
+                NodeKind::Directory(d) => d.was_file.as_ref().map(|f| &f.entry),
+                NodeKind::File(f) => Some(&f.entry),
+            };
+        }
+        None
+    }
+
+    /// Returns `true` if an entry is found for the given `path`.
+    pub fn contains_key(&self, path: impl AsRef<HgPath>) -> bool {
+        self.get(path).is_some()
+    }
+
+    /// Returns a mutable reference to the entry corresponding to `path` if it
+    /// exists.
+    pub fn get_mut(&mut self, path: impl AsRef<HgPath>) -> Option<&mut DirstateEntry> {
+        if let Some(kind) = self.root.get_mut(path.as_ref().as_bytes()) {
+            return match kind {
+                NodeKind::Directory(d) => d.was_file.as_mut().map(|f| &mut f.entry),
+                NodeKind::File(f) => Some(&mut f.entry),
+            };
+        }
+        None
+    }
+
+    /// Returns an iterator over the paths and corresponding entries in the
+    /// tree.
+    pub fn iter(&self) -> Iter {
+        Iter::new(&self.root)
+    }
+
+    /// Returns an iterator of all entries in the tree, with a special
+    /// filesystem handling for the directories containing said entries. See
+    /// the documentation of `FsIter` for more.
+    pub fn fs_iter(&self, root_dir: PathBuf) -> FsIter {
+        FsIter::new(&self.root, root_dir)
+    }
+
+    /// Remove the entry at `path` and returns it, if it exists.
+    pub fn remove(&mut self, path: impl AsRef<HgPath>) -> Option<DirstateEntry> {
+        let RemoveResult { old_entry, .. } = self.root.remove(path.as_ref().as_bytes());
+        self.files_count = self
+            .files_count
+            .checked_sub(if old_entry.is_some() { 1 } else { 0 })
+            .expect("removed too many files");
+        old_entry
+    }
+}
+
+impl<P: AsRef<HgPath>> Extend<(P, DirstateEntry)> for Tree {
+    fn extend<T: IntoIterator<Item = (P, DirstateEntry)>>(&mut self, iter: T) {
+        for (path, entry) in iter {
+            self.insert(path, entry);
+        }
+    }
+}
+
+impl<'a> IntoIterator for &'a Tree {
+    type Item = (HgPathBuf, DirstateEntry);
+    type IntoIter = Iter<'a>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.iter()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::dirstate::dirstate_tree::node::File;
+    use crate::{EntryState, FastHashMap};
+    use pretty_assertions::assert_eq;
+
+    impl Node {
+        /// Shortcut for getting children of a node in tests.
+        fn children(&self) -> Option<&FastHashMap<Vec<u8>, Node>> {
+            match &self.kind {
+                NodeKind::Directory(d) => Some(&d.children),
+                NodeKind::File(_) => None,
+            }
+        }
+    }
+
+    #[test]
+    fn test_dirstate_tree() {
+        let mut tree = Tree::new();
+
+        assert_eq!(
+            tree.insert_node(
+                HgPath::new(b"we/p"),
+                DirstateEntry {
+                    state: EntryState::Normal,
+                    mode: 0,
+                    mtime: 0,
+                    size: 0
+                }
+            ),
+            None
+        );
+        dbg!(&tree);
+        assert!(tree.get_node(HgPath::new(b"we")).is_some());
+        let entry = DirstateEntry {
+            state: EntryState::Merged,
+            mode: 41,
+            mtime: 42,
+            size: 43,
+        };
+        assert_eq!(tree.insert_node(HgPath::new(b"foo/bar"), entry), None);
+        assert_eq!(
+            tree.get_node(HgPath::new(b"foo/bar")),
+            Some(&Node {
+                kind: NodeKind::File(File {
+                    was_directory: None,
+                    entry
+                })
+            })
+        );
+        // We didn't override the first entry we made
+        assert!(tree.get_node(HgPath::new(b"we")).is_some(),);
+        // Inserting the same key again
+        assert_eq!(
+            tree.insert_node(HgPath::new(b"foo/bar"), entry),
+            Some(Node {
+                kind: NodeKind::File(File {
+                    was_directory: None,
+                    entry
+                }),
+            })
+        );
+        // Inserting the two levels deep
+        assert_eq!(tree.insert_node(HgPath::new(b"foo/bar/baz"), entry), None);
+        // Getting a file "inside a file" should return `None`
+        assert_eq!(tree.get_node(HgPath::new(b"foo/bar/baz/bap"),), None);
+
+        assert_eq!(
+            tree.insert_node(HgPath::new(b"wasdir/subfile"), entry),
+            None,
+        );
+        let removed_entry = DirstateEntry {
+            state: EntryState::Removed,
+            mode: 0,
+            mtime: 0,
+            size: 0,
+        };
+        assert!(tree
+            .insert_node(HgPath::new(b"wasdir"), removed_entry)
+            .is_some());
+
+        assert_eq!(
+            tree.get_node(HgPath::new(b"wasdir")),
+            Some(&Node {
+                kind: NodeKind::File(File {
+                    was_directory: Some(Box::new(Directory {
+                        was_file: None,
+                        children: [(
+                            b"subfile".to_vec(),
+                            Node {
+                                kind: NodeKind::File(File {
+                                    was_directory: None,
+                                    entry,
+                                })
+                            }
+                        )]
+                        .to_vec()
+                        .into_iter()
+                        .collect()
+                    })),
+                    entry: removed_entry
+                })
+            })
+        );
+
+        assert!(tree.get(HgPath::new(b"wasdir/subfile")).is_some())
+    }
+
+    #[test]
+    fn test_insert_removed() {
+        let mut tree = Tree::new();
+        let entry = DirstateEntry {
+            state: EntryState::Merged,
+            mode: 1,
+            mtime: 2,
+            size: 3,
+        };
+        let removed_entry = DirstateEntry {
+            state: EntryState::Removed,
+            mode: 10,
+            mtime: 20,
+            size: 30,
+        };
+        assert_eq!(tree.insert_node(HgPath::new(b"foo"), entry), None);
+        assert_eq!(tree.insert_node(HgPath::new(b"foo/a"), removed_entry), None);
+        // The insert should not turn `foo` into a directory as `foo` is not
+        // `Removed`.
+        match tree.get_node(HgPath::new(b"foo")).unwrap().kind {
+            NodeKind::Directory(_) => panic!("should be a file"),
+            NodeKind::File(_) => {}
+        }
+
+        let mut tree = Tree::new();
+        let entry = DirstateEntry {
+            state: EntryState::Merged,
+            mode: 1,
+            mtime: 2,
+            size: 3,
+        };
+        let removed_entry = DirstateEntry {
+            state: EntryState::Removed,
+            mode: 10,
+            mtime: 20,
+            size: 30,
+        };
+        // The insert *should* turn `foo` into a directory as it is `Removed`.
+        assert_eq!(tree.insert_node(HgPath::new(b"foo"), removed_entry), None);
+        assert_eq!(tree.insert_node(HgPath::new(b"foo/a"), entry), None);
+        match tree.get_node(HgPath::new(b"foo")).unwrap().kind {
+            NodeKind::Directory(_) => {}
+            NodeKind::File(_) => panic!("should be a directory"),
+        }
+    }
+
+    #[test]
+    fn test_get() {
+        let mut tree = Tree::new();
+        let entry = DirstateEntry {
+            state: EntryState::Merged,
+            mode: 1,
+            mtime: 2,
+            size: 3,
+        };
+        assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
+        assert_eq!(tree.files_count, 1);
+        assert_eq!(tree.get(HgPath::new(b"a/b/c")), Some(&entry));
+        assert_eq!(tree.get(HgPath::new(b"a/b")), None);
+        assert_eq!(tree.get(HgPath::new(b"a")), None);
+        assert_eq!(tree.get(HgPath::new(b"a/b/c/d")), None);
+        let entry2 = DirstateEntry {
+            state: EntryState::Removed,
+            mode: 0,
+            mtime: 5,
+            size: 1,
+        };
+        // was_directory
+        assert_eq!(tree.insert(HgPath::new(b"a/b"), entry2), None);
+        assert_eq!(tree.files_count, 2);
+        assert_eq!(tree.get(HgPath::new(b"a/b")), Some(&entry2));
+        assert_eq!(tree.get(HgPath::new(b"a/b/c")), Some(&entry));
+
+        let mut tree = Tree::new();
+
+        // was_file
+        assert_eq!(tree.insert_node(HgPath::new(b"a"), entry), None);
+        assert_eq!(tree.files_count, 1);
+        assert_eq!(tree.insert_node(HgPath::new(b"a/b"), entry2), None);
+        assert_eq!(tree.files_count, 2);
+        assert_eq!(tree.get(HgPath::new(b"a/b")), Some(&entry2));
+    }
+
+    #[test]
+    fn test_get_mut() {
+        let mut tree = Tree::new();
+        let mut entry = DirstateEntry {
+            state: EntryState::Merged,
+            mode: 1,
+            mtime: 2,
+            size: 3,
+        };
+        assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
+        assert_eq!(tree.files_count, 1);
+        assert_eq!(tree.get_mut(HgPath::new(b"a/b/c")), Some(&mut entry));
+        assert_eq!(tree.get_mut(HgPath::new(b"a/b")), None);
+        assert_eq!(tree.get_mut(HgPath::new(b"a")), None);
+        assert_eq!(tree.get_mut(HgPath::new(b"a/b/c/d")), None);
+        let mut entry2 = DirstateEntry {
+            state: EntryState::Removed,
+            mode: 0,
+            mtime: 5,
+            size: 1,
+        };
+        // was_directory
+        assert_eq!(tree.insert(HgPath::new(b"a/b"), entry2), None);
+        assert_eq!(tree.files_count, 2);
+        assert_eq!(tree.get_mut(HgPath::new(b"a/b")), Some(&mut entry2));
+        assert_eq!(tree.get_mut(HgPath::new(b"a/b/c")), Some(&mut entry));
+
+        let mut tree = Tree::new();
+
+        // was_file
+        assert_eq!(tree.insert_node(HgPath::new(b"a"), entry), None);
+        assert_eq!(tree.files_count, 1);
+        assert_eq!(tree.insert_node(HgPath::new(b"a/b"), entry2), None);
+        assert_eq!(tree.files_count, 2);
+        assert_eq!(tree.get_mut(HgPath::new(b"a/b")), Some(&mut entry2));
+    }
+
+    #[test]
+    fn test_remove() {
+        let mut tree = Tree::new();
+        assert_eq!(tree.files_count, 0);
+        assert_eq!(tree.remove(HgPath::new(b"foo")), None);
+        assert_eq!(tree.files_count, 0);
+
+        let entry = DirstateEntry {
+            state: EntryState::Normal,
+            mode: 0,
+            mtime: 0,
+            size: 0,
+        };
+        assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
+        assert_eq!(tree.files_count, 1);
+
+        assert_eq!(tree.remove(HgPath::new(b"a/b/c")), Some(entry));
+        assert_eq!(tree.files_count, 0);
+
+        assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), entry), None);
+        assert_eq!(tree.insert_node(HgPath::new(b"a/b/y"), entry), None);
+        assert_eq!(tree.insert_node(HgPath::new(b"a/b/z"), entry), None);
+        assert_eq!(tree.insert_node(HgPath::new(b"x"), entry), None);
+        assert_eq!(tree.insert_node(HgPath::new(b"y"), entry), None);
+        assert_eq!(tree.files_count, 5);
+
+        assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(entry));
+        assert_eq!(tree.files_count, 4);
+        assert_eq!(tree.remove(HgPath::new(b"a/b/x")), None);
+        assert_eq!(tree.files_count, 4);
+        assert_eq!(tree.remove(HgPath::new(b"a/b/y")), Some(entry));
+        assert_eq!(tree.files_count, 3);
+        assert_eq!(tree.remove(HgPath::new(b"a/b/z")), Some(entry));
+        assert_eq!(tree.files_count, 2);
+
+        assert_eq!(tree.remove(HgPath::new(b"x")), Some(entry));
+        assert_eq!(tree.files_count, 1);
+        assert_eq!(tree.remove(HgPath::new(b"y")), Some(entry));
+        assert_eq!(tree.files_count, 0);
+
+        // `a` should have been cleaned up, no more files anywhere in its
+        // descendents
+        assert_eq!(tree.get_node(HgPath::new(b"a")), None);
+        assert_eq!(tree.root.children().unwrap().len(), 0);
+
+        let removed_entry = DirstateEntry {
+            state: EntryState::Removed,
+            ..entry
+        };
+        assert_eq!(tree.insert(HgPath::new(b"a"), removed_entry), None);
+        assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), entry), None);
+        assert_eq!(tree.files_count, 2);
+        dbg!(&tree);
+        assert_eq!(tree.remove(HgPath::new(b"a")), Some(removed_entry));
+        assert_eq!(tree.files_count, 1);
+        dbg!(&tree);
+        assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(entry));
+        assert_eq!(tree.files_count, 0);
+
+        // The entire tree should have been cleaned up, no more files anywhere
+        // in its descendents
+        assert_eq!(tree.root.children().unwrap().len(), 0);
+
+        let removed_entry = DirstateEntry {
+            state: EntryState::Removed,
+            ..entry
+        };
+        assert_eq!(tree.insert(HgPath::new(b"a"), entry), None);
+        assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), removed_entry), None);
+        assert_eq!(tree.files_count, 2);
+        dbg!(&tree);
+        assert_eq!(tree.remove(HgPath::new(b"a")), Some(entry));
+        assert_eq!(tree.files_count, 1);
+        dbg!(&tree);
+        assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(removed_entry));
+        assert_eq!(tree.files_count, 0);
+
+        dbg!(&tree);
+        // The entire tree should have been cleaned up, no more files anywhere
+        // in its descendents
+        assert_eq!(tree.root.children().unwrap().len(), 0);
+
+        assert_eq!(tree.insert(HgPath::new(b"d"), entry), None);
+        assert_eq!(tree.insert(HgPath::new(b"d/d/d"), entry), None);
+        assert_eq!(tree.files_count, 2);
+
+        // Deleting the nested file should not delete the top directory as it
+        // used to be a file
+        assert_eq!(tree.remove(HgPath::new(b"d/d/d")), Some(entry));
+        assert_eq!(tree.files_count, 1);
+        assert!(tree.get_node(HgPath::new(b"d")).is_some());
+        assert!(tree.remove(HgPath::new(b"d")).is_some());
+        assert_eq!(tree.files_count, 0);
+
+        // Deleting the nested file should not delete the top file (other way
+        // around from the last case)
+        assert_eq!(tree.insert(HgPath::new(b"a/a"), entry), None);
+        assert_eq!(tree.files_count, 1);
+        assert_eq!(tree.insert(HgPath::new(b"a"), entry), None);
+        assert_eq!(tree.files_count, 2);
+        dbg!(&tree);
+        assert_eq!(tree.remove(HgPath::new(b"a/a")), Some(entry));
+        assert_eq!(tree.files_count, 1);
+        dbg!(&tree);
+        assert!(tree.get_node(HgPath::new(b"a")).is_some());
+        assert!(tree.get_node(HgPath::new(b"a/a")).is_none());
+    }
+
+    #[test]
+    fn test_was_directory() {
+        let mut tree = Tree::new();
+
+        let entry = DirstateEntry {
+            state: EntryState::Removed,
+            mode: 0,
+            mtime: 0,
+            size: 0,
+        };
+        assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
+        assert_eq!(tree.files_count, 1);
+
+        assert!(tree.insert_node(HgPath::new(b"a"), entry).is_some());
+        let new_a = tree.root.children().unwrap().get(&b"a".to_vec()).unwrap();
+
+        match &new_a.kind {
+            NodeKind::Directory(_) => panic!(),
+            NodeKind::File(f) => {
+                let dir = f.was_directory.clone().unwrap();
+                let c = dir
+                    .children
+                    .get(&b"b".to_vec())
+                    .unwrap()
+                    .children()
+                    .unwrap()
+                    .get(&b"c".to_vec())
+                    .unwrap();
+
+                assert_eq!(
+                    match &c.kind {
+                        NodeKind::Directory(_) => panic!(),
+                        NodeKind::File(f) => f.entry,
+                    },
+                    entry
+                );
+            }
+        }
+        assert_eq!(tree.files_count, 2);
+        dbg!(&tree);
+        assert_eq!(tree.remove(HgPath::new(b"a/b/c")), Some(entry));
+        assert_eq!(tree.files_count, 1);
+        dbg!(&tree);
+        let a = tree.get_node(HgPath::new(b"a")).unwrap();
+        match &a.kind {
+            NodeKind::Directory(_) => panic!(),
+            NodeKind::File(f) => {
+                // Directory in `was_directory` was emptied, should be removed
+                assert_eq!(f.was_directory, None);
+            }
+        }
+    }
+    #[test]
+    fn test_extend() {
+        let insertions = [
+            (
+                HgPathBuf::from_bytes(b"d"),
+                DirstateEntry {
+                    state: EntryState::Added,
+                    mode: 0,
+                    mtime: -1,
+                    size: -1,
+                },
+            ),
+            (
+                HgPathBuf::from_bytes(b"b"),
+                DirstateEntry {
+                    state: EntryState::Normal,
+                    mode: 33188,
+                    mtime: 1599647984,
+                    size: 2,
+                },
+            ),
+            (
+                HgPathBuf::from_bytes(b"a/a"),
+                DirstateEntry {
+                    state: EntryState::Normal,
+                    mode: 33188,
+                    mtime: 1599647984,
+                    size: 2,
+                },
+            ),
+            (
+                HgPathBuf::from_bytes(b"d/d/d"),
+                DirstateEntry {
+                    state: EntryState::Removed,
+                    mode: 0,
+                    mtime: 0,
+                    size: 0,
+                },
+            ),
+        ]
+        .to_vec();
+        let mut tree = Tree::new();
+
+        tree.extend(insertions.clone().into_iter());
+
+        for (path, _) in &insertions {
+            assert!(tree.contains_key(path), true);
+        }
+        assert_eq!(tree.files_count, 4);
+    }
+}