rust/hg-core/src/dirstate/dirstate_tree/tree.rs
changeset 46890 441024b279a6
parent 46889 8759e22f1649
child 46891 c6ceb5f27f97
--- a/rust/hg-core/src/dirstate/dirstate_tree/tree.rs	Sun Sep 13 22:14:25 2020 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,682 +0,0 @@
-// tree.rs
-//
-// Copyright 2020, Raphaël Gomès <rgomes@octobus.net>
-//
-// This software may be used and distributed according to the terms of the
-// GNU General Public License version 2 or any later version.
-
-use super::iter::Iter;
-use super::node::{Directory, Node, NodeKind};
-use crate::dirstate::dirstate_tree::iter::FsIter;
-use crate::dirstate::dirstate_tree::node::{InsertResult, RemoveResult};
-use crate::utils::hg_path::{HgPath, HgPathBuf};
-use crate::DirstateEntry;
-use std::path::PathBuf;
-
-/// A specialized tree to represent the Mercurial dirstate.
-///
-/// # Advantages over a flat structure
-///
-/// The dirstate is inherently hierarchical, since it's a representation of the
-/// file structure of the project. The current dirstate format is flat, and
-/// while that affords us potentially great (unordered) iteration speeds, the
-/// need to retrieve a given path is great enough that you need some kind of
-/// hashmap or tree in a lot of cases anyway.
-///
-/// Going with a tree allows us to be smarter:
-///   - Skipping an ignored directory means we don't visit its entire subtree
-///   - Security auditing does not need to reconstruct paths backwards to check
-///     for symlinked directories, this can be done during the iteration in a
-///     very efficient fashion
-///   - We don't need to build the directory information in another struct,
-///     simplifying the code a lot, reducing the memory footprint and
-///     potentially going faster depending on the implementation.
-///   - We can use it to store a (platform-dependent) caching mechanism [1]
-///   - And probably other types of optimizations.
-///
-/// Only the first two items in this list are implemented as of this commit.
-///
-/// [1]: https://www.mercurial-scm.org/wiki/DirsCachePlan
-///
-///
-/// # Structure
-///
-/// It's a prefix (radix) tree with no fixed arity, with a granularity of a
-/// folder, allowing it to mimic a filesystem hierarchy:
-///
-/// ```text
-/// foo/bar
-/// foo/baz
-/// test
-/// ```
-/// Will be represented (simplified) by:
-///
-/// ```text
-/// Directory(root):
-///   - File("test")
-///   - Directory("foo"):
-///     - File("bar")
-///     - File("baz")
-/// ```
-///
-/// Moreover, it is special-cased for storing the dirstate and as such handles
-/// cases that a simple `HashMap` would handle, but while preserving the
-/// hierarchy.
-/// For example:
-///
-/// ```shell
-/// $ touch foo
-/// $ hg add foo
-/// $ hg commit -m "foo"
-/// $ hg remove foo
-/// $ rm foo
-/// $ mkdir foo
-/// $ touch foo/a
-/// $ hg add foo/a
-/// $ hg status
-///   R foo
-///   A foo/a
-/// ```
-/// To represent this in a tree, one needs to keep track of whether any given
-/// file was a directory and whether any given directory was a file at the last
-/// dirstate update. This tree stores that information, but only in the right
-/// circumstances by respecting the high-level rules that prevent nonsensical
-/// structures to exist:
-///     - a file can only be added as a child of another file if the latter is
-///       marked as `Removed`
-///     - a file cannot replace a folder unless all its descendents are removed
-///
-/// This second rule is not checked by the tree for performance reasons, and
-/// because high-level logic already prevents that state from happening.
-///
-/// # Ordering
-///
-/// It makes no guarantee of ordering for now.
-#[derive(Debug, Default, Clone, PartialEq)]
-pub struct Tree {
-    pub root: Node,
-    files_count: usize,
-}
-
-impl Tree {
-    pub fn new() -> Self {
-        Self {
-            root: Node {
-                kind: NodeKind::Directory(Directory {
-                    was_file: None,
-                    children: Default::default(),
-                }),
-            },
-            files_count: 0,
-        }
-    }
-
-    /// How many files (not directories) are stored in the tree, including ones
-    /// marked as `Removed`.
-    pub fn len(&self) -> usize {
-        self.files_count
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    /// Inserts a file in the tree and returns the previous entry if any.
-    pub fn insert(
-        &mut self,
-        path: impl AsRef<HgPath>,
-        kind: DirstateEntry,
-    ) -> Option<DirstateEntry> {
-        let old = self.insert_node(path, kind);
-        match old?.kind {
-            NodeKind::Directory(_) => None,
-            NodeKind::File(f) => Some(f.entry),
-        }
-    }
-
-    /// Low-level insertion method that returns the previous node (directories
-    /// included).
-    fn insert_node(
-        &mut self,
-        path: impl AsRef<HgPath>,
-        kind: DirstateEntry,
-    ) -> Option<Node> {
-        let InsertResult {
-            did_insert,
-            old_entry,
-        } = self.root.insert(path.as_ref().as_bytes(), kind);
-        self.files_count += if did_insert { 1 } else { 0 };
-        old_entry
-    }
-
-    /// Returns a reference to a node if it exists.
-    pub fn get_node(&self, path: impl AsRef<HgPath>) -> Option<&Node> {
-        self.root.get(path.as_ref().as_bytes())
-    }
-
-    /// Returns a reference to the entry corresponding to `path` if it exists.
-    pub fn get(&self, path: impl AsRef<HgPath>) -> Option<&DirstateEntry> {
-        if let Some(node) = self.get_node(&path) {
-            return match &node.kind {
-                NodeKind::Directory(d) => {
-                    d.was_file.as_ref().map(|f| &f.entry)
-                }
-                NodeKind::File(f) => Some(&f.entry),
-            };
-        }
-        None
-    }
-
-    /// Returns `true` if an entry is found for the given `path`.
-    pub fn contains_key(&self, path: impl AsRef<HgPath>) -> bool {
-        self.get(path).is_some()
-    }
-
-    /// Returns a mutable reference to the entry corresponding to `path` if it
-    /// exists.
-    pub fn get_mut(
-        &mut self,
-        path: impl AsRef<HgPath>,
-    ) -> Option<&mut DirstateEntry> {
-        if let Some(kind) = self.root.get_mut(path.as_ref().as_bytes()) {
-            return match kind {
-                NodeKind::Directory(d) => {
-                    d.was_file.as_mut().map(|f| &mut f.entry)
-                }
-                NodeKind::File(f) => Some(&mut f.entry),
-            };
-        }
-        None
-    }
-
-    /// Returns an iterator over the paths and corresponding entries in the
-    /// tree.
-    pub fn iter(&self) -> Iter {
-        Iter::new(&self.root)
-    }
-
-    /// Returns an iterator of all entries in the tree, with a special
-    /// filesystem handling for the directories containing said entries. See
-    /// the documentation of `FsIter` for more.
-    pub fn fs_iter(&self, root_dir: PathBuf) -> FsIter {
-        FsIter::new(&self.root, root_dir)
-    }
-
-    /// Remove the entry at `path` and returns it, if it exists.
-    pub fn remove(
-        &mut self,
-        path: impl AsRef<HgPath>,
-    ) -> Option<DirstateEntry> {
-        let RemoveResult { old_entry, .. } =
-            self.root.remove(path.as_ref().as_bytes());
-        self.files_count = self
-            .files_count
-            .checked_sub(if old_entry.is_some() { 1 } else { 0 })
-            .expect("removed too many files");
-        old_entry
-    }
-}
-
-impl<P: AsRef<HgPath>> Extend<(P, DirstateEntry)> for Tree {
-    fn extend<T: IntoIterator<Item = (P, DirstateEntry)>>(&mut self, iter: T) {
-        for (path, entry) in iter {
-            self.insert(path, entry);
-        }
-    }
-}
-
-impl<'a> IntoIterator for &'a Tree {
-    type Item = (HgPathBuf, DirstateEntry);
-    type IntoIter = Iter<'a>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        self.iter()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::dirstate::dirstate_tree::node::File;
-    use crate::{EntryState, FastHashMap};
-    use pretty_assertions::assert_eq;
-
-    impl Node {
-        /// Shortcut for getting children of a node in tests.
-        fn children(&self) -> Option<&FastHashMap<Vec<u8>, Node>> {
-            match &self.kind {
-                NodeKind::Directory(d) => Some(&d.children),
-                NodeKind::File(_) => None,
-            }
-        }
-    }
-
-    #[test]
-    fn test_dirstate_tree() {
-        let mut tree = Tree::new();
-
-        assert_eq!(
-            tree.insert_node(
-                HgPath::new(b"we/p"),
-                DirstateEntry {
-                    state: EntryState::Normal,
-                    mode: 0,
-                    mtime: 0,
-                    size: 0
-                }
-            ),
-            None
-        );
-        dbg!(&tree);
-        assert!(tree.get_node(HgPath::new(b"we")).is_some());
-        let entry = DirstateEntry {
-            state: EntryState::Merged,
-            mode: 41,
-            mtime: 42,
-            size: 43,
-        };
-        assert_eq!(tree.insert_node(HgPath::new(b"foo/bar"), entry), None);
-        assert_eq!(
-            tree.get_node(HgPath::new(b"foo/bar")),
-            Some(&Node {
-                kind: NodeKind::File(File {
-                    was_directory: None,
-                    entry
-                })
-            })
-        );
-        // We didn't override the first entry we made
-        assert!(tree.get_node(HgPath::new(b"we")).is_some(),);
-        // Inserting the same key again
-        assert_eq!(
-            tree.insert_node(HgPath::new(b"foo/bar"), entry),
-            Some(Node {
-                kind: NodeKind::File(File {
-                    was_directory: None,
-                    entry
-                }),
-            })
-        );
-        // Inserting the two levels deep
-        assert_eq!(tree.insert_node(HgPath::new(b"foo/bar/baz"), entry), None);
-        // Getting a file "inside a file" should return `None`
-        assert_eq!(tree.get_node(HgPath::new(b"foo/bar/baz/bap"),), None);
-
-        assert_eq!(
-            tree.insert_node(HgPath::new(b"wasdir/subfile"), entry),
-            None,
-        );
-        let removed_entry = DirstateEntry {
-            state: EntryState::Removed,
-            mode: 0,
-            mtime: 0,
-            size: 0,
-        };
-        assert!(tree
-            .insert_node(HgPath::new(b"wasdir"), removed_entry)
-            .is_some());
-
-        assert_eq!(
-            tree.get_node(HgPath::new(b"wasdir")),
-            Some(&Node {
-                kind: NodeKind::File(File {
-                    was_directory: Some(Box::new(Directory {
-                        was_file: None,
-                        children: [(
-                            b"subfile".to_vec(),
-                            Node {
-                                kind: NodeKind::File(File {
-                                    was_directory: None,
-                                    entry,
-                                })
-                            }
-                        )]
-                        .to_vec()
-                        .into_iter()
-                        .collect()
-                    })),
-                    entry: removed_entry
-                })
-            })
-        );
-
-        assert!(tree.get(HgPath::new(b"wasdir/subfile")).is_some())
-    }
-
-    #[test]
-    fn test_insert_removed() {
-        let mut tree = Tree::new();
-        let entry = DirstateEntry {
-            state: EntryState::Merged,
-            mode: 1,
-            mtime: 2,
-            size: 3,
-        };
-        let removed_entry = DirstateEntry {
-            state: EntryState::Removed,
-            mode: 10,
-            mtime: 20,
-            size: 30,
-        };
-        assert_eq!(tree.insert_node(HgPath::new(b"foo"), entry), None);
-        assert_eq!(
-            tree.insert_node(HgPath::new(b"foo/a"), removed_entry),
-            None
-        );
-        // The insert should not turn `foo` into a directory as `foo` is not
-        // `Removed`.
-        match tree.get_node(HgPath::new(b"foo")).unwrap().kind {
-            NodeKind::Directory(_) => panic!("should be a file"),
-            NodeKind::File(_) => {}
-        }
-
-        let mut tree = Tree::new();
-        let entry = DirstateEntry {
-            state: EntryState::Merged,
-            mode: 1,
-            mtime: 2,
-            size: 3,
-        };
-        let removed_entry = DirstateEntry {
-            state: EntryState::Removed,
-            mode: 10,
-            mtime: 20,
-            size: 30,
-        };
-        // The insert *should* turn `foo` into a directory as it is `Removed`.
-        assert_eq!(tree.insert_node(HgPath::new(b"foo"), removed_entry), None);
-        assert_eq!(tree.insert_node(HgPath::new(b"foo/a"), entry), None);
-        match tree.get_node(HgPath::new(b"foo")).unwrap().kind {
-            NodeKind::Directory(_) => {}
-            NodeKind::File(_) => panic!("should be a directory"),
-        }
-    }
-
-    #[test]
-    fn test_get() {
-        let mut tree = Tree::new();
-        let entry = DirstateEntry {
-            state: EntryState::Merged,
-            mode: 1,
-            mtime: 2,
-            size: 3,
-        };
-        assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
-        assert_eq!(tree.files_count, 1);
-        assert_eq!(tree.get(HgPath::new(b"a/b/c")), Some(&entry));
-        assert_eq!(tree.get(HgPath::new(b"a/b")), None);
-        assert_eq!(tree.get(HgPath::new(b"a")), None);
-        assert_eq!(tree.get(HgPath::new(b"a/b/c/d")), None);
-        let entry2 = DirstateEntry {
-            state: EntryState::Removed,
-            mode: 0,
-            mtime: 5,
-            size: 1,
-        };
-        // was_directory
-        assert_eq!(tree.insert(HgPath::new(b"a/b"), entry2), None);
-        assert_eq!(tree.files_count, 2);
-        assert_eq!(tree.get(HgPath::new(b"a/b")), Some(&entry2));
-        assert_eq!(tree.get(HgPath::new(b"a/b/c")), Some(&entry));
-
-        let mut tree = Tree::new();
-
-        // was_file
-        assert_eq!(tree.insert_node(HgPath::new(b"a"), entry), None);
-        assert_eq!(tree.files_count, 1);
-        assert_eq!(tree.insert_node(HgPath::new(b"a/b"), entry2), None);
-        assert_eq!(tree.files_count, 2);
-        assert_eq!(tree.get(HgPath::new(b"a/b")), Some(&entry2));
-    }
-
-    #[test]
-    fn test_get_mut() {
-        let mut tree = Tree::new();
-        let mut entry = DirstateEntry {
-            state: EntryState::Merged,
-            mode: 1,
-            mtime: 2,
-            size: 3,
-        };
-        assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
-        assert_eq!(tree.files_count, 1);
-        assert_eq!(tree.get_mut(HgPath::new(b"a/b/c")), Some(&mut entry));
-        assert_eq!(tree.get_mut(HgPath::new(b"a/b")), None);
-        assert_eq!(tree.get_mut(HgPath::new(b"a")), None);
-        assert_eq!(tree.get_mut(HgPath::new(b"a/b/c/d")), None);
-        let mut entry2 = DirstateEntry {
-            state: EntryState::Removed,
-            mode: 0,
-            mtime: 5,
-            size: 1,
-        };
-        // was_directory
-        assert_eq!(tree.insert(HgPath::new(b"a/b"), entry2), None);
-        assert_eq!(tree.files_count, 2);
-        assert_eq!(tree.get_mut(HgPath::new(b"a/b")), Some(&mut entry2));
-        assert_eq!(tree.get_mut(HgPath::new(b"a/b/c")), Some(&mut entry));
-
-        let mut tree = Tree::new();
-
-        // was_file
-        assert_eq!(tree.insert_node(HgPath::new(b"a"), entry), None);
-        assert_eq!(tree.files_count, 1);
-        assert_eq!(tree.insert_node(HgPath::new(b"a/b"), entry2), None);
-        assert_eq!(tree.files_count, 2);
-        assert_eq!(tree.get_mut(HgPath::new(b"a/b")), Some(&mut entry2));
-    }
-
-    #[test]
-    fn test_remove() {
-        let mut tree = Tree::new();
-        assert_eq!(tree.files_count, 0);
-        assert_eq!(tree.remove(HgPath::new(b"foo")), None);
-        assert_eq!(tree.files_count, 0);
-
-        let entry = DirstateEntry {
-            state: EntryState::Normal,
-            mode: 0,
-            mtime: 0,
-            size: 0,
-        };
-        assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
-        assert_eq!(tree.files_count, 1);
-
-        assert_eq!(tree.remove(HgPath::new(b"a/b/c")), Some(entry));
-        assert_eq!(tree.files_count, 0);
-
-        assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), entry), None);
-        assert_eq!(tree.insert_node(HgPath::new(b"a/b/y"), entry), None);
-        assert_eq!(tree.insert_node(HgPath::new(b"a/b/z"), entry), None);
-        assert_eq!(tree.insert_node(HgPath::new(b"x"), entry), None);
-        assert_eq!(tree.insert_node(HgPath::new(b"y"), entry), None);
-        assert_eq!(tree.files_count, 5);
-
-        assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(entry));
-        assert_eq!(tree.files_count, 4);
-        assert_eq!(tree.remove(HgPath::new(b"a/b/x")), None);
-        assert_eq!(tree.files_count, 4);
-        assert_eq!(tree.remove(HgPath::new(b"a/b/y")), Some(entry));
-        assert_eq!(tree.files_count, 3);
-        assert_eq!(tree.remove(HgPath::new(b"a/b/z")), Some(entry));
-        assert_eq!(tree.files_count, 2);
-
-        assert_eq!(tree.remove(HgPath::new(b"x")), Some(entry));
-        assert_eq!(tree.files_count, 1);
-        assert_eq!(tree.remove(HgPath::new(b"y")), Some(entry));
-        assert_eq!(tree.files_count, 0);
-
-        // `a` should have been cleaned up, no more files anywhere in its
-        // descendents
-        assert_eq!(tree.get_node(HgPath::new(b"a")), None);
-        assert_eq!(tree.root.children().unwrap().len(), 0);
-
-        let removed_entry = DirstateEntry {
-            state: EntryState::Removed,
-            ..entry
-        };
-        assert_eq!(tree.insert(HgPath::new(b"a"), removed_entry), None);
-        assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), entry), None);
-        assert_eq!(tree.files_count, 2);
-        dbg!(&tree);
-        assert_eq!(tree.remove(HgPath::new(b"a")), Some(removed_entry));
-        assert_eq!(tree.files_count, 1);
-        dbg!(&tree);
-        assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(entry));
-        assert_eq!(tree.files_count, 0);
-
-        // The entire tree should have been cleaned up, no more files anywhere
-        // in its descendents
-        assert_eq!(tree.root.children().unwrap().len(), 0);
-
-        let removed_entry = DirstateEntry {
-            state: EntryState::Removed,
-            ..entry
-        };
-        assert_eq!(tree.insert(HgPath::new(b"a"), entry), None);
-        assert_eq!(
-            tree.insert_node(HgPath::new(b"a/b/x"), removed_entry),
-            None
-        );
-        assert_eq!(tree.files_count, 2);
-        dbg!(&tree);
-        assert_eq!(tree.remove(HgPath::new(b"a")), Some(entry));
-        assert_eq!(tree.files_count, 1);
-        dbg!(&tree);
-        assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(removed_entry));
-        assert_eq!(tree.files_count, 0);
-
-        dbg!(&tree);
-        // The entire tree should have been cleaned up, no more files anywhere
-        // in its descendents
-        assert_eq!(tree.root.children().unwrap().len(), 0);
-
-        assert_eq!(tree.insert(HgPath::new(b"d"), entry), None);
-        assert_eq!(tree.insert(HgPath::new(b"d/d/d"), entry), None);
-        assert_eq!(tree.files_count, 2);
-
-        // Deleting the nested file should not delete the top directory as it
-        // used to be a file
-        assert_eq!(tree.remove(HgPath::new(b"d/d/d")), Some(entry));
-        assert_eq!(tree.files_count, 1);
-        assert!(tree.get_node(HgPath::new(b"d")).is_some());
-        assert!(tree.remove(HgPath::new(b"d")).is_some());
-        assert_eq!(tree.files_count, 0);
-
-        // Deleting the nested file should not delete the top file (other way
-        // around from the last case)
-        assert_eq!(tree.insert(HgPath::new(b"a/a"), entry), None);
-        assert_eq!(tree.files_count, 1);
-        assert_eq!(tree.insert(HgPath::new(b"a"), entry), None);
-        assert_eq!(tree.files_count, 2);
-        dbg!(&tree);
-        assert_eq!(tree.remove(HgPath::new(b"a/a")), Some(entry));
-        assert_eq!(tree.files_count, 1);
-        dbg!(&tree);
-        assert!(tree.get_node(HgPath::new(b"a")).is_some());
-        assert!(tree.get_node(HgPath::new(b"a/a")).is_none());
-    }
-
-    #[test]
-    fn test_was_directory() {
-        let mut tree = Tree::new();
-
-        let entry = DirstateEntry {
-            state: EntryState::Removed,
-            mode: 0,
-            mtime: 0,
-            size: 0,
-        };
-        assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None);
-        assert_eq!(tree.files_count, 1);
-
-        assert!(tree.insert_node(HgPath::new(b"a"), entry).is_some());
-        let new_a = tree.root.children().unwrap().get(&b"a".to_vec()).unwrap();
-
-        match &new_a.kind {
-            NodeKind::Directory(_) => panic!(),
-            NodeKind::File(f) => {
-                let dir = f.was_directory.clone().unwrap();
-                let c = dir
-                    .children
-                    .get(&b"b".to_vec())
-                    .unwrap()
-                    .children()
-                    .unwrap()
-                    .get(&b"c".to_vec())
-                    .unwrap();
-
-                assert_eq!(
-                    match &c.kind {
-                        NodeKind::Directory(_) => panic!(),
-                        NodeKind::File(f) => f.entry,
-                    },
-                    entry
-                );
-            }
-        }
-        assert_eq!(tree.files_count, 2);
-        dbg!(&tree);
-        assert_eq!(tree.remove(HgPath::new(b"a/b/c")), Some(entry));
-        assert_eq!(tree.files_count, 1);
-        dbg!(&tree);
-        let a = tree.get_node(HgPath::new(b"a")).unwrap();
-        match &a.kind {
-            NodeKind::Directory(_) => panic!(),
-            NodeKind::File(f) => {
-                // Directory in `was_directory` was emptied, should be removed
-                assert_eq!(f.was_directory, None);
-            }
-        }
-    }
-    #[test]
-    fn test_extend() {
-        let insertions = [
-            (
-                HgPathBuf::from_bytes(b"d"),
-                DirstateEntry {
-                    state: EntryState::Added,
-                    mode: 0,
-                    mtime: -1,
-                    size: -1,
-                },
-            ),
-            (
-                HgPathBuf::from_bytes(b"b"),
-                DirstateEntry {
-                    state: EntryState::Normal,
-                    mode: 33188,
-                    mtime: 1599647984,
-                    size: 2,
-                },
-            ),
-            (
-                HgPathBuf::from_bytes(b"a/a"),
-                DirstateEntry {
-                    state: EntryState::Normal,
-                    mode: 33188,
-                    mtime: 1599647984,
-                    size: 2,
-                },
-            ),
-            (
-                HgPathBuf::from_bytes(b"d/d/d"),
-                DirstateEntry {
-                    state: EntryState::Removed,
-                    mode: 0,
-                    mtime: 0,
-                    size: 0,
-                },
-            ),
-        ]
-        .to_vec();
-        let mut tree = Tree::new();
-
-        tree.extend(insertions.clone().into_iter());
-
-        for (path, _) in &insertions {
-            assert!(tree.contains_key(path), true);
-        }
-        assert_eq!(tree.files_count, 4);
-    }
-}