rust-dirs-multiset: add `DirsChildrenMultiset`
authorRaphaël Gomès <rgomes@octobus.net>
Tue, 14 Jan 2020 17:04:32 +0100
changeset 44267 0e9ac3968b56
parent 44266 9ab4830e9e3d
child 44268 aa0fc32ece9e
rust-dirs-multiset: add `DirsChildrenMultiset` In a future patch, this structure will be needed to store information needed by the (also upcoming) `IgnoreMatcher`. Differential Revision: https://phab.mercurial-scm.org/D7869
rust/hg-core/src/dirstate/dirs_multiset.rs
rust/hg-core/src/utils/files.rs
rust/hg-core/src/utils/hg_path.rs
--- a/rust/hg-core/src/dirstate/dirs_multiset.rs	Tue Jan 14 16:50:35 2020 +0100
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs	Tue Jan 14 17:04:32 2020 +0100
@@ -8,12 +8,15 @@
 //! A multiset of directory names.
 //!
 //! Used to counts the references to directories in a manifest or dirstate.
-use crate::utils::hg_path::{HgPath, HgPathBuf};
 use crate::{
-    dirstate::EntryState, utils::files, DirstateEntry, DirstateMapError,
-    FastHashMap,
+    dirstate::EntryState,
+    utils::{
+        files,
+        hg_path::{HgPath, HgPathBuf},
+    },
+    DirstateEntry, DirstateMapError, FastHashMap,
 };
-use std::collections::hash_map::{self, Entry};
+use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet};
 
 // could be encapsulated if we care API stability more seriously
 pub type DirsMultisetIter<'a> = hash_map::Keys<'a, HgPathBuf, u32>;
@@ -129,6 +132,68 @@
     }
 }
 
+/// This is basically a reimplementation of `DirsMultiset` that stores the
+/// children instead of just a count of them, plus a small optional
+/// optimization to avoid some directories we don't need.
+#[derive(PartialEq, Debug)]
+pub struct DirsChildrenMultiset<'a> {
+    inner: FastHashMap<&'a HgPath, HashSet<&'a HgPath>>,
+    only_include: Option<HashSet<&'a HgPath>>,
+}
+
+impl<'a> DirsChildrenMultiset<'a> {
+    pub fn new(
+        paths: impl Iterator<Item = &'a HgPathBuf>,
+        only_include: Option<&'a HashSet<impl AsRef<HgPath> + 'a>>,
+    ) -> Self {
+        let mut new = Self {
+            inner: HashMap::default(),
+            only_include: only_include
+                .map(|s| s.iter().map(|p| p.as_ref()).collect()),
+        };
+
+        for path in paths {
+            new.add_path(path)
+        }
+
+        new
+    }
+    fn add_path(&mut self, path: &'a (impl AsRef<HgPath> + 'a)) {
+        if path.as_ref().is_empty() {
+            return;
+        }
+        for (directory, basename) in files::find_dirs_with_base(path.as_ref())
+        {
+            if !self.is_dir_included(directory) {
+                continue;
+            }
+            self.inner
+                .entry(directory)
+                .and_modify(|e| {
+                    e.insert(basename);
+                })
+                .or_insert_with(|| {
+                    let mut set = HashSet::new();
+                    set.insert(basename);
+                    set
+                });
+        }
+    }
+    fn is_dir_included(&self, dir: impl AsRef<HgPath>) -> bool {
+        match &self.only_include {
+            None => false,
+            Some(i) => i.contains(dir.as_ref()),
+        }
+    }
+
+    pub fn get(
+        &self,
+        path: impl AsRef<HgPath>,
+    ) -> Option<&HashSet<&'a HgPath>> {
+        self.inner.get(path.as_ref())
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
--- a/rust/hg-core/src/utils/files.rs	Tue Jan 14 16:50:35 2020 +0100
+++ b/rust/hg-core/src/utils/files.rs	Tue Jan 14 17:04:32 2020 +0100
@@ -10,11 +10,11 @@
 //! Functions for fiddling with files.
 
 use crate::utils::hg_path::{HgPath, HgPathBuf};
-use std::iter::FusedIterator;
 
 use crate::utils::replace_slice;
 use lazy_static::lazy_static;
 use std::fs::Metadata;
+use std::iter::FusedIterator;
 use std::path::Path;
 
 pub fn get_path_from_bytes(bytes: &[u8]) -> &Path {
@@ -64,6 +64,28 @@
 
 impl<'a> FusedIterator for Ancestors<'a> {}
 
+/// An iterator over repository path yielding itself and its ancestors.
+#[derive(Copy, Clone, Debug)]
+pub(crate) struct AncestorsWithBase<'a> {
+    next: Option<(&'a HgPath, &'a HgPath)>,
+}
+
+impl<'a> Iterator for AncestorsWithBase<'a> {
+    type Item = (&'a HgPath, &'a HgPath);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let next = self.next;
+        self.next = match self.next {
+            Some((s, _)) if s.is_empty() => None,
+            Some((s, _)) => Some(s.split_filename()),
+            None => None,
+        };
+        next
+    }
+}
+
+impl<'a> FusedIterator for AncestorsWithBase<'a> {}
+
 /// Returns an iterator yielding ancestor directories of the given repository
 /// path.
 ///
@@ -79,6 +101,25 @@
     dirs
 }
 
+/// Returns an iterator yielding ancestor directories of the given repository
+/// path.
+///
+/// The path is separated by '/', and must not start with '/'.
+///
+/// The path itself isn't included unless it is b"" (meaning the root
+/// directory.)
+pub(crate) fn find_dirs_with_base<'a>(
+    path: &'a HgPath,
+) -> AncestorsWithBase<'a> {
+    let mut dirs = AncestorsWithBase {
+        next: Some((path, HgPath::new(b""))),
+    };
+    if !path.is_empty() {
+        dirs.next(); // skip itself
+    }
+    dirs
+}
+
 /// TODO more than ASCII?
 pub fn normalize_case(path: &HgPath) -> HgPathBuf {
     #[cfg(windows)] // NTFS compares via upper()
@@ -170,4 +211,28 @@
         assert_eq!(dirs.next(), None);
         assert_eq!(dirs.next(), None);
     }
+
+    #[test]
+    fn test_find_dirs_with_base_some() {
+        let mut dirs = super::find_dirs_with_base(HgPath::new(b"foo/bar/baz"));
+        assert_eq!(
+            dirs.next(),
+            Some((HgPath::new(b"foo/bar"), HgPath::new(b"baz")))
+        );
+        assert_eq!(
+            dirs.next(),
+            Some((HgPath::new(b"foo"), HgPath::new(b"bar")))
+        );
+        assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b"foo"))));
+        assert_eq!(dirs.next(), None);
+        assert_eq!(dirs.next(), None);
+    }
+
+    #[test]
+    fn test_find_dirs_with_base_empty() {
+        let mut dirs = super::find_dirs_with_base(HgPath::new(b""));
+        assert_eq!(dirs.next(), Some((HgPath::new(b""), HgPath::new(b""))));
+        assert_eq!(dirs.next(), None);
+        assert_eq!(dirs.next(), None);
+    }
 }
--- a/rust/hg-core/src/utils/hg_path.rs	Tue Jan 14 16:50:35 2020 +0100
+++ b/rust/hg-core/src/utils/hg_path.rs	Tue Jan 14 17:04:32 2020 +0100
@@ -183,6 +183,29 @@
             &self.inner[..]
         })
     }
+    /// Returns a tuple of slices `(base, filename)` resulting from the split
+    /// at the rightmost `/`, if any.
+    ///
+    /// # Examples:
+    ///
+    /// ```
+    /// use hg::utils::hg_path::HgPath;
+    ///
+    /// let path = HgPath::new(b"cool/hg/path").split_filename();
+    /// assert_eq!(path, (HgPath::new(b"cool/hg"), HgPath::new(b"path")));
+    ///
+    /// let path = HgPath::new(b"pathwithoutsep").split_filename();
+    /// assert_eq!(path, (HgPath::new(b""), HgPath::new(b"pathwithoutsep")));
+    /// ```
+    pub fn split_filename(&self) -> (&Self, &Self) {
+        match &self.inner.iter().rposition(|c| *c == b'/') {
+            None => (HgPath::new(""), &self),
+            Some(size) => (
+                HgPath::new(&self.inner[..*size]),
+                HgPath::new(&self.inner[*size + 1..]),
+            ),
+        }
+    }
     pub fn join<T: ?Sized + AsRef<Self>>(&self, other: &T) -> HgPathBuf {
         let mut inner = self.inner.to_owned();
         if inner.len() != 0 && inner.last() != Some(&b'/') {