diff -r 8759e22f1649 -r 441024b279a6 rust/hg-core/src/dirstate/dirstate_tree/tree.rs --- a/rust/hg-core/src/dirstate/dirstate_tree/tree.rs Sun Sep 13 22:14:25 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,682 +0,0 @@ -// tree.rs -// -// Copyright 2020, Raphaël Gomès -// -// This software may be used and distributed according to the terms of the -// GNU General Public License version 2 or any later version. - -use super::iter::Iter; -use super::node::{Directory, Node, NodeKind}; -use crate::dirstate::dirstate_tree::iter::FsIter; -use crate::dirstate::dirstate_tree::node::{InsertResult, RemoveResult}; -use crate::utils::hg_path::{HgPath, HgPathBuf}; -use crate::DirstateEntry; -use std::path::PathBuf; - -/// A specialized tree to represent the Mercurial dirstate. -/// -/// # Advantages over a flat structure -/// -/// The dirstate is inherently hierarchical, since it's a representation of the -/// file structure of the project. The current dirstate format is flat, and -/// while that affords us potentially great (unordered) iteration speeds, the -/// need to retrieve a given path is great enough that you need some kind of -/// hashmap or tree in a lot of cases anyway. -/// -/// Going with a tree allows us to be smarter: -/// - Skipping an ignored directory means we don't visit its entire subtree -/// - Security auditing does not need to reconstruct paths backwards to check -/// for symlinked directories, this can be done during the iteration in a -/// very efficient fashion -/// - We don't need to build the directory information in another struct, -/// simplifying the code a lot, reducing the memory footprint and -/// potentially going faster depending on the implementation. -/// - We can use it to store a (platform-dependent) caching mechanism [1] -/// - And probably other types of optimizations. -/// -/// Only the first two items in this list are implemented as of this commit. -/// -/// [1]: https://www.mercurial-scm.org/wiki/DirsCachePlan -/// -/// -/// # Structure -/// -/// It's a prefix (radix) tree with no fixed arity, with a granularity of a -/// folder, allowing it to mimic a filesystem hierarchy: -/// -/// ```text -/// foo/bar -/// foo/baz -/// test -/// ``` -/// Will be represented (simplified) by: -/// -/// ```text -/// Directory(root): -/// - File("test") -/// - Directory("foo"): -/// - File("bar") -/// - File("baz") -/// ``` -/// -/// Moreover, it is special-cased for storing the dirstate and as such handles -/// cases that a simple `HashMap` would handle, but while preserving the -/// hierarchy. -/// For example: -/// -/// ```shell -/// $ touch foo -/// $ hg add foo -/// $ hg commit -m "foo" -/// $ hg remove foo -/// $ rm foo -/// $ mkdir foo -/// $ touch foo/a -/// $ hg add foo/a -/// $ hg status -/// R foo -/// A foo/a -/// ``` -/// To represent this in a tree, one needs to keep track of whether any given -/// file was a directory and whether any given directory was a file at the last -/// dirstate update. This tree stores that information, but only in the right -/// circumstances by respecting the high-level rules that prevent nonsensical -/// structures to exist: -/// - a file can only be added as a child of another file if the latter is -/// marked as `Removed` -/// - a file cannot replace a folder unless all its descendents are removed -/// -/// This second rule is not checked by the tree for performance reasons, and -/// because high-level logic already prevents that state from happening. -/// -/// # Ordering -/// -/// It makes no guarantee of ordering for now. -#[derive(Debug, Default, Clone, PartialEq)] -pub struct Tree { - pub root: Node, - files_count: usize, -} - -impl Tree { - pub fn new() -> Self { - Self { - root: Node { - kind: NodeKind::Directory(Directory { - was_file: None, - children: Default::default(), - }), - }, - files_count: 0, - } - } - - /// How many files (not directories) are stored in the tree, including ones - /// marked as `Removed`. - pub fn len(&self) -> usize { - self.files_count - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Inserts a file in the tree and returns the previous entry if any. - pub fn insert( - &mut self, - path: impl AsRef, - kind: DirstateEntry, - ) -> Option { - let old = self.insert_node(path, kind); - match old?.kind { - NodeKind::Directory(_) => None, - NodeKind::File(f) => Some(f.entry), - } - } - - /// Low-level insertion method that returns the previous node (directories - /// included). - fn insert_node( - &mut self, - path: impl AsRef, - kind: DirstateEntry, - ) -> Option { - let InsertResult { - did_insert, - old_entry, - } = self.root.insert(path.as_ref().as_bytes(), kind); - self.files_count += if did_insert { 1 } else { 0 }; - old_entry - } - - /// Returns a reference to a node if it exists. - pub fn get_node(&self, path: impl AsRef) -> Option<&Node> { - self.root.get(path.as_ref().as_bytes()) - } - - /// Returns a reference to the entry corresponding to `path` if it exists. - pub fn get(&self, path: impl AsRef) -> Option<&DirstateEntry> { - if let Some(node) = self.get_node(&path) { - return match &node.kind { - NodeKind::Directory(d) => { - d.was_file.as_ref().map(|f| &f.entry) - } - NodeKind::File(f) => Some(&f.entry), - }; - } - None - } - - /// Returns `true` if an entry is found for the given `path`. - pub fn contains_key(&self, path: impl AsRef) -> bool { - self.get(path).is_some() - } - - /// Returns a mutable reference to the entry corresponding to `path` if it - /// exists. - pub fn get_mut( - &mut self, - path: impl AsRef, - ) -> Option<&mut DirstateEntry> { - if let Some(kind) = self.root.get_mut(path.as_ref().as_bytes()) { - return match kind { - NodeKind::Directory(d) => { - d.was_file.as_mut().map(|f| &mut f.entry) - } - NodeKind::File(f) => Some(&mut f.entry), - }; - } - None - } - - /// Returns an iterator over the paths and corresponding entries in the - /// tree. - pub fn iter(&self) -> Iter { - Iter::new(&self.root) - } - - /// Returns an iterator of all entries in the tree, with a special - /// filesystem handling for the directories containing said entries. See - /// the documentation of `FsIter` for more. - pub fn fs_iter(&self, root_dir: PathBuf) -> FsIter { - FsIter::new(&self.root, root_dir) - } - - /// Remove the entry at `path` and returns it, if it exists. - pub fn remove( - &mut self, - path: impl AsRef, - ) -> Option { - let RemoveResult { old_entry, .. } = - self.root.remove(path.as_ref().as_bytes()); - self.files_count = self - .files_count - .checked_sub(if old_entry.is_some() { 1 } else { 0 }) - .expect("removed too many files"); - old_entry - } -} - -impl> Extend<(P, DirstateEntry)> for Tree { - fn extend>(&mut self, iter: T) { - for (path, entry) in iter { - self.insert(path, entry); - } - } -} - -impl<'a> IntoIterator for &'a Tree { - type Item = (HgPathBuf, DirstateEntry); - type IntoIter = Iter<'a>; - - fn into_iter(self) -> Self::IntoIter { - self.iter() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::dirstate::dirstate_tree::node::File; - use crate::{EntryState, FastHashMap}; - use pretty_assertions::assert_eq; - - impl Node { - /// Shortcut for getting children of a node in tests. - fn children(&self) -> Option<&FastHashMap, Node>> { - match &self.kind { - NodeKind::Directory(d) => Some(&d.children), - NodeKind::File(_) => None, - } - } - } - - #[test] - fn test_dirstate_tree() { - let mut tree = Tree::new(); - - assert_eq!( - tree.insert_node( - HgPath::new(b"we/p"), - DirstateEntry { - state: EntryState::Normal, - mode: 0, - mtime: 0, - size: 0 - } - ), - None - ); - dbg!(&tree); - assert!(tree.get_node(HgPath::new(b"we")).is_some()); - let entry = DirstateEntry { - state: EntryState::Merged, - mode: 41, - mtime: 42, - size: 43, - }; - assert_eq!(tree.insert_node(HgPath::new(b"foo/bar"), entry), None); - assert_eq!( - tree.get_node(HgPath::new(b"foo/bar")), - Some(&Node { - kind: NodeKind::File(File { - was_directory: None, - entry - }) - }) - ); - // We didn't override the first entry we made - assert!(tree.get_node(HgPath::new(b"we")).is_some(),); - // Inserting the same key again - assert_eq!( - tree.insert_node(HgPath::new(b"foo/bar"), entry), - Some(Node { - kind: NodeKind::File(File { - was_directory: None, - entry - }), - }) - ); - // Inserting the two levels deep - assert_eq!(tree.insert_node(HgPath::new(b"foo/bar/baz"), entry), None); - // Getting a file "inside a file" should return `None` - assert_eq!(tree.get_node(HgPath::new(b"foo/bar/baz/bap"),), None); - - assert_eq!( - tree.insert_node(HgPath::new(b"wasdir/subfile"), entry), - None, - ); - let removed_entry = DirstateEntry { - state: EntryState::Removed, - mode: 0, - mtime: 0, - size: 0, - }; - assert!(tree - .insert_node(HgPath::new(b"wasdir"), removed_entry) - .is_some()); - - assert_eq!( - tree.get_node(HgPath::new(b"wasdir")), - Some(&Node { - kind: NodeKind::File(File { - was_directory: Some(Box::new(Directory { - was_file: None, - children: [( - b"subfile".to_vec(), - Node { - kind: NodeKind::File(File { - was_directory: None, - entry, - }) - } - )] - .to_vec() - .into_iter() - .collect() - })), - entry: removed_entry - }) - }) - ); - - assert!(tree.get(HgPath::new(b"wasdir/subfile")).is_some()) - } - - #[test] - fn test_insert_removed() { - let mut tree = Tree::new(); - let entry = DirstateEntry { - state: EntryState::Merged, - mode: 1, - mtime: 2, - size: 3, - }; - let removed_entry = DirstateEntry { - state: EntryState::Removed, - mode: 10, - mtime: 20, - size: 30, - }; - assert_eq!(tree.insert_node(HgPath::new(b"foo"), entry), None); - assert_eq!( - tree.insert_node(HgPath::new(b"foo/a"), removed_entry), - None - ); - // The insert should not turn `foo` into a directory as `foo` is not - // `Removed`. - match tree.get_node(HgPath::new(b"foo")).unwrap().kind { - NodeKind::Directory(_) => panic!("should be a file"), - NodeKind::File(_) => {} - } - - let mut tree = Tree::new(); - let entry = DirstateEntry { - state: EntryState::Merged, - mode: 1, - mtime: 2, - size: 3, - }; - let removed_entry = DirstateEntry { - state: EntryState::Removed, - mode: 10, - mtime: 20, - size: 30, - }; - // The insert *should* turn `foo` into a directory as it is `Removed`. - assert_eq!(tree.insert_node(HgPath::new(b"foo"), removed_entry), None); - assert_eq!(tree.insert_node(HgPath::new(b"foo/a"), entry), None); - match tree.get_node(HgPath::new(b"foo")).unwrap().kind { - NodeKind::Directory(_) => {} - NodeKind::File(_) => panic!("should be a directory"), - } - } - - #[test] - fn test_get() { - let mut tree = Tree::new(); - let entry = DirstateEntry { - state: EntryState::Merged, - mode: 1, - mtime: 2, - size: 3, - }; - assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None); - assert_eq!(tree.files_count, 1); - assert_eq!(tree.get(HgPath::new(b"a/b/c")), Some(&entry)); - assert_eq!(tree.get(HgPath::new(b"a/b")), None); - assert_eq!(tree.get(HgPath::new(b"a")), None); - assert_eq!(tree.get(HgPath::new(b"a/b/c/d")), None); - let entry2 = DirstateEntry { - state: EntryState::Removed, - mode: 0, - mtime: 5, - size: 1, - }; - // was_directory - assert_eq!(tree.insert(HgPath::new(b"a/b"), entry2), None); - assert_eq!(tree.files_count, 2); - assert_eq!(tree.get(HgPath::new(b"a/b")), Some(&entry2)); - assert_eq!(tree.get(HgPath::new(b"a/b/c")), Some(&entry)); - - let mut tree = Tree::new(); - - // was_file - assert_eq!(tree.insert_node(HgPath::new(b"a"), entry), None); - assert_eq!(tree.files_count, 1); - assert_eq!(tree.insert_node(HgPath::new(b"a/b"), entry2), None); - assert_eq!(tree.files_count, 2); - assert_eq!(tree.get(HgPath::new(b"a/b")), Some(&entry2)); - } - - #[test] - fn test_get_mut() { - let mut tree = Tree::new(); - let mut entry = DirstateEntry { - state: EntryState::Merged, - mode: 1, - mtime: 2, - size: 3, - }; - assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None); - assert_eq!(tree.files_count, 1); - assert_eq!(tree.get_mut(HgPath::new(b"a/b/c")), Some(&mut entry)); - assert_eq!(tree.get_mut(HgPath::new(b"a/b")), None); - assert_eq!(tree.get_mut(HgPath::new(b"a")), None); - assert_eq!(tree.get_mut(HgPath::new(b"a/b/c/d")), None); - let mut entry2 = DirstateEntry { - state: EntryState::Removed, - mode: 0, - mtime: 5, - size: 1, - }; - // was_directory - assert_eq!(tree.insert(HgPath::new(b"a/b"), entry2), None); - assert_eq!(tree.files_count, 2); - assert_eq!(tree.get_mut(HgPath::new(b"a/b")), Some(&mut entry2)); - assert_eq!(tree.get_mut(HgPath::new(b"a/b/c")), Some(&mut entry)); - - let mut tree = Tree::new(); - - // was_file - assert_eq!(tree.insert_node(HgPath::new(b"a"), entry), None); - assert_eq!(tree.files_count, 1); - assert_eq!(tree.insert_node(HgPath::new(b"a/b"), entry2), None); - assert_eq!(tree.files_count, 2); - assert_eq!(tree.get_mut(HgPath::new(b"a/b")), Some(&mut entry2)); - } - - #[test] - fn test_remove() { - let mut tree = Tree::new(); - assert_eq!(tree.files_count, 0); - assert_eq!(tree.remove(HgPath::new(b"foo")), None); - assert_eq!(tree.files_count, 0); - - let entry = DirstateEntry { - state: EntryState::Normal, - mode: 0, - mtime: 0, - size: 0, - }; - assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None); - assert_eq!(tree.files_count, 1); - - assert_eq!(tree.remove(HgPath::new(b"a/b/c")), Some(entry)); - assert_eq!(tree.files_count, 0); - - assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), entry), None); - assert_eq!(tree.insert_node(HgPath::new(b"a/b/y"), entry), None); - assert_eq!(tree.insert_node(HgPath::new(b"a/b/z"), entry), None); - assert_eq!(tree.insert_node(HgPath::new(b"x"), entry), None); - assert_eq!(tree.insert_node(HgPath::new(b"y"), entry), None); - assert_eq!(tree.files_count, 5); - - assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(entry)); - assert_eq!(tree.files_count, 4); - assert_eq!(tree.remove(HgPath::new(b"a/b/x")), None); - assert_eq!(tree.files_count, 4); - assert_eq!(tree.remove(HgPath::new(b"a/b/y")), Some(entry)); - assert_eq!(tree.files_count, 3); - assert_eq!(tree.remove(HgPath::new(b"a/b/z")), Some(entry)); - assert_eq!(tree.files_count, 2); - - assert_eq!(tree.remove(HgPath::new(b"x")), Some(entry)); - assert_eq!(tree.files_count, 1); - assert_eq!(tree.remove(HgPath::new(b"y")), Some(entry)); - assert_eq!(tree.files_count, 0); - - // `a` should have been cleaned up, no more files anywhere in its - // descendents - assert_eq!(tree.get_node(HgPath::new(b"a")), None); - assert_eq!(tree.root.children().unwrap().len(), 0); - - let removed_entry = DirstateEntry { - state: EntryState::Removed, - ..entry - }; - assert_eq!(tree.insert(HgPath::new(b"a"), removed_entry), None); - assert_eq!(tree.insert_node(HgPath::new(b"a/b/x"), entry), None); - assert_eq!(tree.files_count, 2); - dbg!(&tree); - assert_eq!(tree.remove(HgPath::new(b"a")), Some(removed_entry)); - assert_eq!(tree.files_count, 1); - dbg!(&tree); - assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(entry)); - assert_eq!(tree.files_count, 0); - - // The entire tree should have been cleaned up, no more files anywhere - // in its descendents - assert_eq!(tree.root.children().unwrap().len(), 0); - - let removed_entry = DirstateEntry { - state: EntryState::Removed, - ..entry - }; - assert_eq!(tree.insert(HgPath::new(b"a"), entry), None); - assert_eq!( - tree.insert_node(HgPath::new(b"a/b/x"), removed_entry), - None - ); - assert_eq!(tree.files_count, 2); - dbg!(&tree); - assert_eq!(tree.remove(HgPath::new(b"a")), Some(entry)); - assert_eq!(tree.files_count, 1); - dbg!(&tree); - assert_eq!(tree.remove(HgPath::new(b"a/b/x")), Some(removed_entry)); - assert_eq!(tree.files_count, 0); - - dbg!(&tree); - // The entire tree should have been cleaned up, no more files anywhere - // in its descendents - assert_eq!(tree.root.children().unwrap().len(), 0); - - assert_eq!(tree.insert(HgPath::new(b"d"), entry), None); - assert_eq!(tree.insert(HgPath::new(b"d/d/d"), entry), None); - assert_eq!(tree.files_count, 2); - - // Deleting the nested file should not delete the top directory as it - // used to be a file - assert_eq!(tree.remove(HgPath::new(b"d/d/d")), Some(entry)); - assert_eq!(tree.files_count, 1); - assert!(tree.get_node(HgPath::new(b"d")).is_some()); - assert!(tree.remove(HgPath::new(b"d")).is_some()); - assert_eq!(tree.files_count, 0); - - // Deleting the nested file should not delete the top file (other way - // around from the last case) - assert_eq!(tree.insert(HgPath::new(b"a/a"), entry), None); - assert_eq!(tree.files_count, 1); - assert_eq!(tree.insert(HgPath::new(b"a"), entry), None); - assert_eq!(tree.files_count, 2); - dbg!(&tree); - assert_eq!(tree.remove(HgPath::new(b"a/a")), Some(entry)); - assert_eq!(tree.files_count, 1); - dbg!(&tree); - assert!(tree.get_node(HgPath::new(b"a")).is_some()); - assert!(tree.get_node(HgPath::new(b"a/a")).is_none()); - } - - #[test] - fn test_was_directory() { - let mut tree = Tree::new(); - - let entry = DirstateEntry { - state: EntryState::Removed, - mode: 0, - mtime: 0, - size: 0, - }; - assert_eq!(tree.insert_node(HgPath::new(b"a/b/c"), entry), None); - assert_eq!(tree.files_count, 1); - - assert!(tree.insert_node(HgPath::new(b"a"), entry).is_some()); - let new_a = tree.root.children().unwrap().get(&b"a".to_vec()).unwrap(); - - match &new_a.kind { - NodeKind::Directory(_) => panic!(), - NodeKind::File(f) => { - let dir = f.was_directory.clone().unwrap(); - let c = dir - .children - .get(&b"b".to_vec()) - .unwrap() - .children() - .unwrap() - .get(&b"c".to_vec()) - .unwrap(); - - assert_eq!( - match &c.kind { - NodeKind::Directory(_) => panic!(), - NodeKind::File(f) => f.entry, - }, - entry - ); - } - } - assert_eq!(tree.files_count, 2); - dbg!(&tree); - assert_eq!(tree.remove(HgPath::new(b"a/b/c")), Some(entry)); - assert_eq!(tree.files_count, 1); - dbg!(&tree); - let a = tree.get_node(HgPath::new(b"a")).unwrap(); - match &a.kind { - NodeKind::Directory(_) => panic!(), - NodeKind::File(f) => { - // Directory in `was_directory` was emptied, should be removed - assert_eq!(f.was_directory, None); - } - } - } - #[test] - fn test_extend() { - let insertions = [ - ( - HgPathBuf::from_bytes(b"d"), - DirstateEntry { - state: EntryState::Added, - mode: 0, - mtime: -1, - size: -1, - }, - ), - ( - HgPathBuf::from_bytes(b"b"), - DirstateEntry { - state: EntryState::Normal, - mode: 33188, - mtime: 1599647984, - size: 2, - }, - ), - ( - HgPathBuf::from_bytes(b"a/a"), - DirstateEntry { - state: EntryState::Normal, - mode: 33188, - mtime: 1599647984, - size: 2, - }, - ), - ( - HgPathBuf::from_bytes(b"d/d/d"), - DirstateEntry { - state: EntryState::Removed, - mode: 0, - mtime: 0, - size: 0, - }, - ), - ] - .to_vec(); - let mut tree = Tree::new(); - - tree.extend(insertions.clone().into_iter()); - - for (path, _) in &insertions { - assert!(tree.contains_key(path), true); - } - assert_eq!(tree.files_count, 4); - } -}