4 // |
4 // |
5 // This software may be used and distributed according to the terms of the |
5 // This software may be used and distributed according to the terms of the |
6 // GNU General Public License version 2 or any later version. |
6 // GNU General Public License version 2 or any later version. |
7 |
7 |
8 //! Structs and types for matching files and directories. |
8 //! Structs and types for matching files and directories. |
|
9 |
|
10 use format_bytes::format_bytes; |
|
11 use once_cell::sync::OnceCell; |
9 |
12 |
10 use crate::{ |
13 use crate::{ |
11 dirstate::dirs_multiset::DirsChildrenMultiset, |
14 dirstate::dirs_multiset::DirsChildrenMultiset, |
12 filepatterns::{ |
15 filepatterns::{ |
13 build_single_regex, filter_subincludes, get_patterns_from_file, |
16 build_single_regex, filter_subincludes, get_patterns_from_file, |
21 DirsMultiset, FastHashMap, IgnorePattern, PatternError, PatternSyntax, |
24 DirsMultiset, FastHashMap, IgnorePattern, PatternError, PatternSyntax, |
22 }; |
25 }; |
23 |
26 |
24 use crate::dirstate::status::IgnoreFnType; |
27 use crate::dirstate::status::IgnoreFnType; |
25 use crate::filepatterns::normalize_path_bytes; |
28 use crate::filepatterns::normalize_path_bytes; |
26 use std::borrow::ToOwned; |
|
27 use std::collections::HashSet; |
29 use std::collections::HashSet; |
28 use std::fmt::{Display, Error, Formatter}; |
30 use std::fmt::{Display, Error, Formatter}; |
29 use std::ops::Deref; |
31 use std::ops::Deref; |
30 use std::path::{Path, PathBuf}; |
32 use std::path::{Path, PathBuf}; |
|
33 use std::{borrow::ToOwned, collections::BTreeSet}; |
31 |
34 |
32 #[derive(Debug, PartialEq)] |
35 #[derive(Debug, PartialEq)] |
33 pub enum VisitChildrenSet { |
36 pub enum VisitChildrenSet { |
34 /// Don't visit anything |
37 /// Don't visit anything |
35 Empty, |
38 Empty, |
171 /// ``` |
174 /// ``` |
172 #[derive(Debug)] |
175 #[derive(Debug)] |
173 pub struct FileMatcher { |
176 pub struct FileMatcher { |
174 files: HashSet<HgPathBuf>, |
177 files: HashSet<HgPathBuf>, |
175 dirs: DirsMultiset, |
178 dirs: DirsMultiset, |
|
179 sorted_visitchildrenset_candidates: OnceCell<BTreeSet<HgPathBuf>>, |
176 } |
180 } |
177 |
181 |
178 impl FileMatcher { |
182 impl FileMatcher { |
179 pub fn new(files: Vec<HgPathBuf>) -> Result<Self, HgPathError> { |
183 pub fn new(files: Vec<HgPathBuf>) -> Result<Self, HgPathError> { |
180 let dirs = DirsMultiset::from_manifest(&files)?; |
184 let dirs = DirsMultiset::from_manifest(&files)?; |
181 Ok(Self { |
185 Ok(Self { |
182 files: HashSet::from_iter(files.into_iter()), |
186 files: HashSet::from_iter(files.into_iter()), |
183 dirs, |
187 dirs, |
|
188 sorted_visitchildrenset_candidates: OnceCell::new(), |
184 }) |
189 }) |
185 } |
190 } |
186 fn inner_matches(&self, filename: &HgPath) -> bool { |
191 fn inner_matches(&self, filename: &HgPath) -> bool { |
187 self.files.contains(filename.as_ref()) |
192 self.files.contains(filename.as_ref()) |
188 } |
193 } |
197 } |
202 } |
198 fn matches(&self, filename: &HgPath) -> bool { |
203 fn matches(&self, filename: &HgPath) -> bool { |
199 self.inner_matches(filename) |
204 self.inner_matches(filename) |
200 } |
205 } |
201 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet { |
206 fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet { |
202 if self.files.is_empty() || !self.dirs.contains(&directory) { |
207 if self.files.is_empty() || !self.dirs.contains(directory) { |
203 return VisitChildrenSet::Empty; |
208 return VisitChildrenSet::Empty; |
204 } |
209 } |
205 let mut candidates: HashSet<HgPathBuf> = |
210 |
206 self.dirs.iter().cloned().collect(); |
211 let compute_candidates = || -> BTreeSet<HgPathBuf> { |
207 |
212 let mut candidates: BTreeSet<HgPathBuf> = |
208 candidates.extend(self.files.iter().cloned()); |
213 self.dirs.iter().cloned().collect(); |
209 candidates.remove(HgPath::new(b"")); |
214 candidates.extend(self.files.iter().cloned()); |
210 |
215 candidates.remove(HgPath::new(b"")); |
211 if !directory.as_ref().is_empty() { |
216 candidates |
212 let directory = [directory.as_ref().as_bytes(), b"/"].concat(); |
217 }; |
213 candidates = candidates |
218 let candidates = |
214 .iter() |
219 if directory.as_ref().is_empty() { |
215 .filter_map(|c| { |
220 compute_candidates() |
216 if c.as_bytes().starts_with(&directory) { |
221 } else { |
217 Some(HgPathBuf::from_bytes( |
222 let sorted_candidates = self |
218 &c.as_bytes()[directory.len()..], |
223 .sorted_visitchildrenset_candidates |
219 )) |
224 .get_or_init(compute_candidates); |
220 } else { |
225 let directory_bytes = directory.as_ref().as_bytes(); |
221 None |
226 let start: HgPathBuf = |
222 } |
227 format_bytes!(b"{}/", directory_bytes).into(); |
223 }) |
228 let start_len = start.len(); |
224 .collect(); |
229 // `0` sorts after `/` |
225 } |
230 let end = format_bytes!(b"{}0", directory_bytes).into(); |
|
231 BTreeSet::from_iter(sorted_candidates.range(start..end).map( |
|
232 |c| HgPathBuf::from_bytes(&c.as_bytes()[start_len..]), |
|
233 )) |
|
234 }; |
226 |
235 |
227 // `self.dirs` includes all of the directories, recursively, so if |
236 // `self.dirs` includes all of the directories, recursively, so if |
228 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo', |
237 // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo', |
229 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a |
238 // 'foo/bar' in it. Thus we can safely ignore a candidate that has a |
230 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate |
239 // '/' in it, indicating it's for a subdir-of-a-subdir; the immediate |