rust-dirstate: add "dirs" Rust implementation
authorRaphaël Gomès <rgomes@octobus.net>
Thu, 16 May 2019 18:03:06 +0200
changeset 42536 2dcee6497b0b
parent 42535 df5f674050b7
child 42537 ce94f9622acd
rust-dirstate: add "dirs" Rust implementation Following the work done in d1786c1d34fa and working towards the goal of a complete Rust implementation of the dirstate, this rewrites the `dirs` class. There is already a C implementation, which relies heavily on CPython hacks and protocol violations for performance, so I don't expect this to perform as well for now, as this is very straight-forward code. The immediate benefits are new high-level documentation and some unit tests. Differential Revision: https://phab.mercurial-scm.org/D6393
rust/hg-core/src/dirstate/dirs_multiset.rs
rust/hg-core/src/dirstate/mod.rs
rust/hg-core/src/lib.rs
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs	Thu May 16 18:03:06 2019 +0200
@@ -0,0 +1,355 @@
+// dirs_multiset.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! A multiset of directory names.
+//!
+//! Used to counts the references to directories in a manifest or dirstate.
+use std::collections::hash_map::Entry;
+use std::collections::HashMap;
+use std::ops::Deref;
+use {DirsIterable, DirstateEntry, DirstateMapError};
+
+#[derive(PartialEq, Debug)]
+pub struct DirsMultiset {
+    inner: HashMap<Vec<u8>, u32>,
+}
+
+impl Deref for DirsMultiset {
+    type Target = HashMap<Vec<u8>, u32>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.inner
+    }
+}
+
+impl DirsMultiset {
+    /// Initializes the multiset from a dirstate or a manifest.
+    ///
+    /// If `skip_state` is provided, skips dirstate entries with equal state.
+    pub fn new(iterable: DirsIterable, skip_state: Option<i8>) -> Self {
+        let mut multiset = DirsMultiset {
+            inner: HashMap::new(),
+        };
+
+        match iterable {
+            DirsIterable::Dirstate(vec) => {
+                for (ref filename, DirstateEntry { state, .. }) in vec {
+                    // This `if` is optimized out of the loop
+                    if let Some(skip) = skip_state {
+                        if skip != state {
+                            multiset.add_path(filename);
+                        }
+                    } else {
+                        multiset.add_path(filename);
+                    }
+                }
+            }
+            DirsIterable::Manifest(vec) => {
+                for ref filename in vec {
+                    multiset.add_path(filename);
+                }
+            }
+        }
+
+        multiset
+    }
+
+    /// Returns the slice up to the next directory name from right to left,
+    /// without trailing slash
+    fn find_dir(path: &[u8]) -> &[u8] {
+        let mut path = path;
+        loop {
+            if let Some(new_pos) = path.len().checked_sub(1) {
+                if path[new_pos] == b'/' {
+                    break &path[..new_pos];
+                }
+                path = &path[..new_pos];
+            } else {
+                break &[];
+            }
+        }
+    }
+
+    /// Increases the count of deepest directory contained in the path.
+    ///
+    /// If the directory is not yet in the map, adds its parents.
+    pub fn add_path(&mut self, path: &[u8]) {
+        let mut pos = path.len();
+
+        loop {
+            let subpath = Self::find_dir(&path[..pos]);
+            if let Some(val) = self.inner.get_mut(subpath) {
+                *val += 1;
+                break;
+            }
+            self.inner.insert(subpath.to_owned(), 1);
+
+            pos = subpath.len();
+            if pos == 0 {
+                break;
+            }
+        }
+    }
+
+    /// Decreases the count of deepest directory contained in the path.
+    ///
+    /// If it is the only reference, decreases all parents until one is
+    /// removed.
+    /// If the directory is not in the map, something horrible has happened.
+    pub fn delete_path(
+        &mut self,
+        path: &[u8],
+    ) -> Result<(), DirstateMapError> {
+        let mut pos = path.len();
+
+        loop {
+            let subpath = Self::find_dir(&path[..pos]);
+            match self.inner.entry(subpath.to_owned()) {
+                Entry::Occupied(mut entry) => {
+                    let val = entry.get().clone();
+                    if val > 1 {
+                        entry.insert(val - 1);
+                        break;
+                    }
+                    entry.remove();
+                }
+                Entry::Vacant(_) => {
+                    return Err(DirstateMapError::PathNotFound(path.to_owned()))
+                }
+            };
+
+            pos = subpath.len();
+            if pos == 0 {
+                break;
+            }
+        }
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_delete_path_path_not_found() {
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let path = b"doesnotexist/";
+        assert_eq!(
+            Err(DirstateMapError::PathNotFound(path.to_vec())),
+            map.delete_path(path)
+        );
+    }
+
+    #[test]
+    fn test_delete_path_empty_path() {
+        let mut map =
+            DirsMultiset::new(DirsIterable::Manifest(vec![vec![]]), None);
+        let path = b"";
+        assert_eq!(Ok(()), map.delete_path(path));
+        assert_eq!(
+            Err(DirstateMapError::PathNotFound(path.to_vec())),
+            map.delete_path(path)
+        );
+    }
+
+    #[test]
+    fn test_delete_path_successful() {
+        let mut map = DirsMultiset {
+            inner: [("", 5), ("a", 3), ("a/b", 2), ("a/c", 1)]
+                .iter()
+                .map(|(k, v)| (k.as_bytes().to_vec(), *v))
+                .collect(),
+        };
+
+        assert_eq!(Ok(()), map.delete_path(b"a/b/"));
+        assert_eq!(Ok(()), map.delete_path(b"a/b/"));
+        assert_eq!(
+            Err(DirstateMapError::PathNotFound(b"a/b/".to_vec())),
+            map.delete_path(b"a/b/")
+        );
+
+        assert_eq!(2, *map.get(&b"a".to_vec()).unwrap());
+        assert_eq!(1, *map.get(&b"a/c".to_vec()).unwrap());
+        eprintln!("{:?}", map);
+        assert_eq!(Ok(()), map.delete_path(b"a/"));
+        eprintln!("{:?}", map);
+
+        assert_eq!(Ok(()), map.delete_path(b"a/c/"));
+        assert_eq!(
+            Err(DirstateMapError::PathNotFound(b"a/c/".to_vec())),
+            map.delete_path(b"a/c/")
+        );
+    }
+
+    #[test]
+    fn test_add_path_empty_path() {
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let path = b"";
+        map.add_path(path);
+
+        assert_eq!(1, map.len());
+    }
+
+    #[test]
+    fn test_add_path_successful() {
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+
+        map.add_path(b"a/");
+        assert_eq!(1, *map.get(&b"a".to_vec()).unwrap());
+        assert_eq!(1, *map.get(&Vec::new()).unwrap());
+        assert_eq!(2, map.len());
+
+        // Non directory should be ignored
+        map.add_path(b"a");
+        assert_eq!(1, *map.get(&b"a".to_vec()).unwrap());
+        assert_eq!(2, map.len());
+
+        // Non directory will still add its base
+        map.add_path(b"a/b");
+        assert_eq!(2, *map.get(&b"a".to_vec()).unwrap());
+        assert_eq!(2, map.len());
+
+        // Duplicate path works
+        map.add_path(b"a/");
+        assert_eq!(3, *map.get(&b"a".to_vec()).unwrap());
+
+        // Nested dir adds to its base
+        map.add_path(b"a/b/");
+        assert_eq!(4, *map.get(&b"a".to_vec()).unwrap());
+        assert_eq!(1, *map.get(&b"a/b".to_vec()).unwrap());
+
+        // but not its base's base, because it already existed
+        map.add_path(b"a/b/c/");
+        assert_eq!(4, *map.get(&b"a".to_vec()).unwrap());
+        assert_eq!(2, *map.get(&b"a/b".to_vec()).unwrap());
+
+        map.add_path(b"a/c/");
+        assert_eq!(1, *map.get(&b"a/c".to_vec()).unwrap());
+
+        let expected = DirsMultiset {
+            inner: [("", 2), ("a", 5), ("a/b", 2), ("a/b/c", 1), ("a/c", 1)]
+                .iter()
+                .map(|(k, v)| (k.as_bytes().to_vec(), *v))
+                .collect(),
+        };
+        assert_eq!(map, expected);
+    }
+
+    #[test]
+    fn test_dirsmultiset_new_empty() {
+        use DirsIterable::{Dirstate, Manifest};
+
+        let new = DirsMultiset::new(Manifest(vec![]), None);
+        let expected = DirsMultiset {
+            inner: HashMap::new(),
+        };
+        assert_eq!(expected, new);
+
+        let new = DirsMultiset::new(Dirstate(vec![]), None);
+        let expected = DirsMultiset {
+            inner: HashMap::new(),
+        };
+        assert_eq!(expected, new);
+    }
+
+    #[test]
+    fn test_dirsmultiset_new_no_skip() {
+        use DirsIterable::{Dirstate, Manifest};
+
+        let input_vec = ["a/", "b/", "a/c", "a/d/"]
+            .iter()
+            .map(|e| e.as_bytes().to_vec())
+            .collect();
+        let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
+            .iter()
+            .map(|(k, v)| (k.as_bytes().to_vec(), *v))
+            .collect();
+
+        let new = DirsMultiset::new(Manifest(input_vec), None);
+        let expected = DirsMultiset {
+            inner: expected_inner,
+        };
+        assert_eq!(expected, new);
+
+        let input_map = ["a/", "b/", "a/c", "a/d/"]
+            .iter()
+            .map(|f| {
+                (
+                    f.as_bytes().to_vec(),
+                    DirstateEntry {
+                        state: 0,
+                        mode: 0,
+                        mtime: 0,
+                        size: 0,
+                    },
+                )
+            })
+            .collect();
+        let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
+            .iter()
+            .map(|(k, v)| (k.as_bytes().to_vec(), *v))
+            .collect();
+
+        let new = DirsMultiset::new(Dirstate(input_map), None);
+        let expected = DirsMultiset {
+            inner: expected_inner,
+        };
+        assert_eq!(expected, new);
+    }
+
+    #[test]
+    fn test_dirsmultiset_new_skip() {
+        use DirsIterable::{Dirstate, Manifest};
+
+        let input_vec = ["a/", "b/", "a/c", "a/d/"]
+            .iter()
+            .map(|e| e.as_bytes().to_vec())
+            .collect();
+        let expected_inner = [("", 2), ("a", 3), ("b", 1), ("a/d", 1)]
+            .iter()
+            .map(|(k, v)| (k.as_bytes().to_vec(), *v))
+            .collect();
+
+        let new = DirsMultiset::new(Manifest(input_vec), Some('n' as i8));
+        let expected = DirsMultiset {
+            inner: expected_inner,
+        };
+        // Skip does not affect a manifest
+        assert_eq!(expected, new);
+
+        let input_map =
+            [("a/", 'n'), ("a/b/", 'n'), ("a/c", 'r'), ("a/d/", 'm')]
+                .iter()
+                .map(|(f, state)| {
+                    (
+                        f.as_bytes().to_vec(),
+                        DirstateEntry {
+                            state: *state as i8,
+                            mode: 0,
+                            mtime: 0,
+                            size: 0,
+                        },
+                    )
+                })
+                .collect();
+
+        // "a" incremented with "a/c" and "a/d/"
+        let expected_inner = [("", 1), ("a", 2), ("a/d", 1)]
+            .iter()
+            .map(|(k, v)| (k.as_bytes().to_vec(), *v))
+            .collect();
+
+        let new = DirsMultiset::new(Dirstate(input_map), Some('n' as i8));
+        let expected = DirsMultiset {
+            inner: expected_inner,
+        };
+        assert_eq!(expected, new);
+    }
+
+}
--- a/rust/hg-core/src/dirstate/mod.rs	Fri Jun 21 00:26:07 2019 +0530
+++ b/rust/hg-core/src/dirstate/mod.rs	Thu May 16 18:03:06 2019 +0200
@@ -1,3 +1,4 @@
+pub mod dirs_multiset;
 pub mod parsers;
 
 #[derive(Debug, PartialEq, Copy, Clone)]
@@ -26,3 +27,10 @@
 }
 
 pub type CopyVec<'a> = Vec<CopyVecEntry<'a>>;
+
+/// The Python implementation passes either a mapping (dirstate) or a flat
+/// iterable (manifest)
+pub enum DirsIterable {
+    Dirstate(DirstateVec),
+    Manifest(Vec<Vec<u8>>),
+}
--- a/rust/hg-core/src/lib.rs	Fri Jun 21 00:26:07 2019 +0530
+++ b/rust/hg-core/src/lib.rs	Thu May 16 18:03:06 2019 +0200
@@ -15,8 +15,10 @@
 pub mod discovery;
 pub mod testing; // unconditionally built, for use from integration tests
 pub use dirstate::{
+    dirs_multiset::DirsMultiset,
     parsers::{pack_dirstate, parse_dirstate},
-    CopyVec, CopyVecEntry, DirstateEntry, DirstateParents, DirstateVec,
+    CopyVec, CopyVecEntry, DirsIterable, DirstateEntry, DirstateParents,
+    DirstateVec,
 };
 mod filepatterns;
 mod utils;
@@ -73,6 +75,12 @@
     BadSize(usize, usize),
 }
 
+#[derive(Debug, PartialEq)]
+pub enum DirstateMapError {
+    PathNotFound(Vec<u8>),
+    EmptyPath,
+}
+
 impl From<std::io::Error> for DirstatePackError {
     fn from(e: std::io::Error) -> Self {
         DirstatePackError::CorruptedEntry(e.to_string())