rust/hg-core/src/matchers.rs
changeset 51109 687e192dae16
parent 50865 f874342fa568
child 51120 532e74ad3ff6
equal deleted inserted replaced
51108:0250e45040f1 51109:687e192dae16
     4 //
     4 //
     5 // This software may be used and distributed according to the terms of the
     5 // This software may be used and distributed according to the terms of the
     6 // GNU General Public License version 2 or any later version.
     6 // GNU General Public License version 2 or any later version.
     7 
     7 
     8 //! Structs and types for matching files and directories.
     8 //! Structs and types for matching files and directories.
       
     9 
       
    10 use format_bytes::format_bytes;
       
    11 use once_cell::sync::OnceCell;
     9 
    12 
    10 use crate::{
    13 use crate::{
    11     dirstate::dirs_multiset::DirsChildrenMultiset,
    14     dirstate::dirs_multiset::DirsChildrenMultiset,
    12     filepatterns::{
    15     filepatterns::{
    13         build_single_regex, filter_subincludes, get_patterns_from_file,
    16         build_single_regex, filter_subincludes, get_patterns_from_file,
    21     DirsMultiset, FastHashMap, IgnorePattern, PatternError, PatternSyntax,
    24     DirsMultiset, FastHashMap, IgnorePattern, PatternError, PatternSyntax,
    22 };
    25 };
    23 
    26 
    24 use crate::dirstate::status::IgnoreFnType;
    27 use crate::dirstate::status::IgnoreFnType;
    25 use crate::filepatterns::normalize_path_bytes;
    28 use crate::filepatterns::normalize_path_bytes;
    26 use std::borrow::ToOwned;
       
    27 use std::collections::HashSet;
    29 use std::collections::HashSet;
    28 use std::fmt::{Display, Error, Formatter};
    30 use std::fmt::{Display, Error, Formatter};
    29 use std::ops::Deref;
    31 use std::ops::Deref;
    30 use std::path::{Path, PathBuf};
    32 use std::path::{Path, PathBuf};
       
    33 use std::{borrow::ToOwned, collections::BTreeSet};
    31 
    34 
    32 #[derive(Debug, PartialEq)]
    35 #[derive(Debug, PartialEq)]
    33 pub enum VisitChildrenSet {
    36 pub enum VisitChildrenSet {
    34     /// Don't visit anything
    37     /// Don't visit anything
    35     Empty,
    38     Empty,
   171 /// ```
   174 /// ```
   172 #[derive(Debug)]
   175 #[derive(Debug)]
   173 pub struct FileMatcher {
   176 pub struct FileMatcher {
   174     files: HashSet<HgPathBuf>,
   177     files: HashSet<HgPathBuf>,
   175     dirs: DirsMultiset,
   178     dirs: DirsMultiset,
       
   179     sorted_visitchildrenset_candidates: OnceCell<BTreeSet<HgPathBuf>>,
   176 }
   180 }
   177 
   181 
   178 impl FileMatcher {
   182 impl FileMatcher {
   179     pub fn new(files: Vec<HgPathBuf>) -> Result<Self, HgPathError> {
   183     pub fn new(files: Vec<HgPathBuf>) -> Result<Self, HgPathError> {
   180         let dirs = DirsMultiset::from_manifest(&files)?;
   184         let dirs = DirsMultiset::from_manifest(&files)?;
   181         Ok(Self {
   185         Ok(Self {
   182             files: HashSet::from_iter(files.into_iter()),
   186             files: HashSet::from_iter(files.into_iter()),
   183             dirs,
   187             dirs,
       
   188             sorted_visitchildrenset_candidates: OnceCell::new(),
   184         })
   189         })
   185     }
   190     }
   186     fn inner_matches(&self, filename: &HgPath) -> bool {
   191     fn inner_matches(&self, filename: &HgPath) -> bool {
   187         self.files.contains(filename.as_ref())
   192         self.files.contains(filename.as_ref())
   188     }
   193     }
   197     }
   202     }
   198     fn matches(&self, filename: &HgPath) -> bool {
   203     fn matches(&self, filename: &HgPath) -> bool {
   199         self.inner_matches(filename)
   204         self.inner_matches(filename)
   200     }
   205     }
   201     fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
   206     fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet {
   202         if self.files.is_empty() || !self.dirs.contains(&directory) {
   207         if self.files.is_empty() || !self.dirs.contains(directory) {
   203             return VisitChildrenSet::Empty;
   208             return VisitChildrenSet::Empty;
   204         }
   209         }
   205         let mut candidates: HashSet<HgPathBuf> =
   210 
   206             self.dirs.iter().cloned().collect();
   211         let compute_candidates = || -> BTreeSet<HgPathBuf> {
   207 
   212             let mut candidates: BTreeSet<HgPathBuf> =
   208         candidates.extend(self.files.iter().cloned());
   213                 self.dirs.iter().cloned().collect();
   209         candidates.remove(HgPath::new(b""));
   214             candidates.extend(self.files.iter().cloned());
   210 
   215             candidates.remove(HgPath::new(b""));
   211         if !directory.as_ref().is_empty() {
   216             candidates
   212             let directory = [directory.as_ref().as_bytes(), b"/"].concat();
   217         };
   213             candidates = candidates
   218         let candidates =
   214                 .iter()
   219             if directory.as_ref().is_empty() {
   215                 .filter_map(|c| {
   220                 compute_candidates()
   216                     if c.as_bytes().starts_with(&directory) {
   221             } else {
   217                         Some(HgPathBuf::from_bytes(
   222                 let sorted_candidates = self
   218                             &c.as_bytes()[directory.len()..],
   223                     .sorted_visitchildrenset_candidates
   219                         ))
   224                     .get_or_init(compute_candidates);
   220                     } else {
   225                 let directory_bytes = directory.as_ref().as_bytes();
   221                         None
   226                 let start: HgPathBuf =
   222                     }
   227                     format_bytes!(b"{}/", directory_bytes).into();
   223                 })
   228                 let start_len = start.len();
   224                 .collect();
   229                 // `0` sorts after `/`
   225         }
   230                 let end = format_bytes!(b"{}0", directory_bytes).into();
       
   231                 BTreeSet::from_iter(sorted_candidates.range(start..end).map(
       
   232                     |c| HgPathBuf::from_bytes(&c.as_bytes()[start_len..]),
       
   233                 ))
       
   234             };
   226 
   235 
   227         // `self.dirs` includes all of the directories, recursively, so if
   236         // `self.dirs` includes all of the directories, recursively, so if
   228         // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
   237         // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
   229         // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
   238         // 'foo/bar' in it. Thus we can safely ignore a candidate that has a
   230         // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
   239         // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate