1 use crate::dirstate::status::IgnoreFnType; |
1 use crate::dirstate::status::IgnoreFnType; |
2 use crate::dirstate_tree::dirstate_map::BorrowedPath; |
2 use crate::dirstate_tree::dirstate_map::BorrowedPath; |
3 use crate::dirstate_tree::dirstate_map::ChildNodesRef; |
3 use crate::dirstate_tree::dirstate_map::ChildNodesRef; |
4 use crate::dirstate_tree::dirstate_map::DirstateMap; |
4 use crate::dirstate_tree::dirstate_map::DirstateMap; |
|
5 use crate::dirstate_tree::dirstate_map::NodeData; |
5 use crate::dirstate_tree::dirstate_map::NodeRef; |
6 use crate::dirstate_tree::dirstate_map::NodeRef; |
6 use crate::dirstate_tree::on_disk::DirstateV2ParseError; |
7 use crate::dirstate_tree::on_disk::DirstateV2ParseError; |
|
8 use crate::dirstate_tree::on_disk::Timestamp; |
|
9 use crate::dirstate_tree::path_with_basename::WithBasename; |
7 use crate::matchers::get_ignore_function; |
10 use crate::matchers::get_ignore_function; |
8 use crate::matchers::Matcher; |
11 use crate::matchers::Matcher; |
9 use crate::utils::files::get_bytes_from_os_string; |
12 use crate::utils::files::get_bytes_from_os_string; |
10 use crate::utils::files::get_path_from_bytes; |
13 use crate::utils::files::get_path_from_bytes; |
11 use crate::utils::hg_path::HgPath; |
14 use crate::utils::hg_path::HgPath; |
16 use crate::PatternFileWarning; |
19 use crate::PatternFileWarning; |
17 use crate::StatusError; |
20 use crate::StatusError; |
18 use crate::StatusOptions; |
21 use crate::StatusOptions; |
19 use micro_timer::timed; |
22 use micro_timer::timed; |
20 use rayon::prelude::*; |
23 use rayon::prelude::*; |
|
24 use std::borrow::Cow; |
21 use std::io; |
25 use std::io; |
22 use std::path::Path; |
26 use std::path::Path; |
23 use std::path::PathBuf; |
27 use std::path::PathBuf; |
24 use std::sync::Mutex; |
28 use std::sync::Mutex; |
|
29 use std::time::SystemTime; |
25 |
30 |
26 /// Returns the status of the working directory compared to its parent |
31 /// Returns the status of the working directory compared to its parent |
27 /// changeset. |
32 /// changeset. |
28 /// |
33 /// |
29 /// This algorithm is based on traversing the filesystem tree (`fs` in function |
34 /// This algorithm is based on traversing the filesystem tree (`fs` in function |
50 let common = StatusCommon { |
55 let common = StatusCommon { |
51 dmap, |
56 dmap, |
52 options, |
57 options, |
53 matcher, |
58 matcher, |
54 ignore_fn, |
59 ignore_fn, |
55 outcome: Mutex::new(DirstateStatus::default()), |
60 outcome: Default::default(), |
|
61 cached_directory_mtimes_to_add: Default::default(), |
|
62 filesystem_time_at_status_start: filesystem_now(&root_dir).ok(), |
56 }; |
63 }; |
57 let is_at_repo_root = true; |
64 let is_at_repo_root = true; |
58 let hg_path = &BorrowedPath::OnDisk(HgPath::new("")); |
65 let hg_path = &BorrowedPath::OnDisk(HgPath::new("")); |
59 let has_ignored_ancestor = false; |
66 let has_ignored_ancestor = false; |
|
67 let root_cached_mtime = None; |
|
68 let root_dir_metadata = None; |
|
69 // If the path we have for the repository root is a symlink, do follow it. |
|
70 // (As opposed to symlinks within the working directory which are not |
|
71 // followed, using `std::fs::symlink_metadata`.) |
60 common.traverse_fs_directory_and_dirstate( |
72 common.traverse_fs_directory_and_dirstate( |
61 has_ignored_ancestor, |
73 has_ignored_ancestor, |
62 dmap.root.as_ref(), |
74 dmap.root.as_ref(), |
63 hg_path, |
75 hg_path, |
64 &root_dir, |
76 &root_dir, |
|
77 root_dir_metadata, |
|
78 root_cached_mtime, |
65 is_at_repo_root, |
79 is_at_repo_root, |
66 )?; |
80 )?; |
67 Ok((common.outcome.into_inner().unwrap(), warnings)) |
81 let outcome = common.outcome.into_inner().unwrap(); |
|
82 let to_add = common.cached_directory_mtimes_to_add.into_inner().unwrap(); |
|
83 for (path, mtime) in &to_add { |
|
84 let node = DirstateMap::get_or_insert_node( |
|
85 dmap.on_disk, |
|
86 &mut dmap.root, |
|
87 path, |
|
88 WithBasename::to_cow_owned, |
|
89 |_| {}, |
|
90 )?; |
|
91 match &node.data { |
|
92 NodeData::Entry(_) => {} // Don’t overwrite an entry |
|
93 NodeData::CachedDirectory { .. } | NodeData::None => { |
|
94 node.data = NodeData::CachedDirectory { mtime: *mtime } |
|
95 } |
|
96 } |
|
97 } |
|
98 Ok((outcome, warnings)) |
68 } |
99 } |
69 |
100 |
70 /// Bag of random things needed by various parts of the algorithm. Reduces the |
101 /// Bag of random things needed by various parts of the algorithm. Reduces the |
71 /// number of parameters passed to functions. |
102 /// number of parameters passed to functions. |
72 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> { |
103 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> { |
73 dmap: &'tree DirstateMap<'on_disk>, |
104 dmap: &'tree DirstateMap<'on_disk>, |
74 options: StatusOptions, |
105 options: StatusOptions, |
75 matcher: &'a (dyn Matcher + Sync), |
106 matcher: &'a (dyn Matcher + Sync), |
76 ignore_fn: IgnoreFnType<'a>, |
107 ignore_fn: IgnoreFnType<'a>, |
77 outcome: Mutex<DirstateStatus<'on_disk>>, |
108 outcome: Mutex<DirstateStatus<'on_disk>>, |
|
109 cached_directory_mtimes_to_add: |
|
110 Mutex<Vec<(Cow<'on_disk, HgPath>, Timestamp)>>, |
|
111 |
|
112 /// The current time at the start of the `status()` algorithm, as measured |
|
113 /// and possibly truncated by the filesystem. |
|
114 filesystem_time_at_status_start: Option<SystemTime>, |
78 } |
115 } |
79 |
116 |
80 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> { |
117 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> { |
81 fn read_dir( |
118 fn read_dir( |
82 &self, |
119 &self, |
95 .unwrap() |
132 .unwrap() |
96 .bad |
133 .bad |
97 .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) |
134 .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) |
98 } |
135 } |
99 |
136 |
|
137 /// If this returns true, we can get accurate results by only using |
|
138 /// `symlink_metadata` for child nodes that exist in the dirstate and don’t |
|
139 /// need to call `read_dir`. |
|
140 fn can_skip_fs_readdir( |
|
141 &self, |
|
142 directory_metadata: Option<&std::fs::Metadata>, |
|
143 cached_directory_mtime: Option<&Timestamp>, |
|
144 ) -> bool { |
|
145 if !self.options.list_unknown && !self.options.list_ignored { |
|
146 // All states that we care about listing have corresponding |
|
147 // dirstate entries. |
|
148 // This happens for example with `hg status -mard`. |
|
149 return true; |
|
150 } |
|
151 if let Some(cached_mtime) = cached_directory_mtime { |
|
152 // The dirstate contains a cached mtime for this directory, set by |
|
153 // a previous run of the `status` algorithm which found this |
|
154 // directory eligible for `read_dir` caching. |
|
155 if let Some(meta) = directory_metadata { |
|
156 if let Ok(current_mtime) = meta.modified() { |
|
157 if current_mtime == cached_mtime.into() { |
|
158 // The mtime of that directory has not changed since |
|
159 // then, which means that the |
|
160 // results of `read_dir` should also |
|
161 // be unchanged. |
|
162 return true; |
|
163 } |
|
164 } |
|
165 } |
|
166 } |
|
167 false |
|
168 } |
|
169 |
|
170 /// Returns whether the filesystem directory was found to have any entry |
|
171 /// that does not have a corresponding dirstate tree node. |
100 fn traverse_fs_directory_and_dirstate( |
172 fn traverse_fs_directory_and_dirstate( |
101 &self, |
173 &self, |
102 has_ignored_ancestor: bool, |
174 has_ignored_ancestor: bool, |
103 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>, |
175 dirstate_nodes: ChildNodesRef<'tree, 'on_disk>, |
104 directory_hg_path: &BorrowedPath<'tree, 'on_disk>, |
176 directory_hg_path: &BorrowedPath<'tree, 'on_disk>, |
105 directory_fs_path: &Path, |
177 directory_fs_path: &Path, |
|
178 directory_metadata: Option<&std::fs::Metadata>, |
|
179 cached_directory_mtime: Option<&Timestamp>, |
106 is_at_repo_root: bool, |
180 is_at_repo_root: bool, |
107 ) -> Result<(), DirstateV2ParseError> { |
181 ) -> Result<bool, DirstateV2ParseError> { |
108 if !self.options.list_unknown && !self.options.list_ignored { |
182 if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime) |
109 // We only care about files in the dirstate, so we can skip listing |
183 { |
110 // filesystem directories entirely. |
184 dirstate_nodes |
111 return dirstate_nodes |
|
112 .par_iter() |
185 .par_iter() |
113 .map(|dirstate_node| { |
186 .map(|dirstate_node| { |
114 let fs_path = directory_fs_path.join(get_path_from_bytes( |
187 let fs_path = directory_fs_path.join(get_path_from_bytes( |
115 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(), |
188 dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(), |
116 )); |
189 )); |
172 }, |
251 }, |
173 ) |
252 ) |
174 .par_bridge() |
253 .par_bridge() |
175 .map(|pair| { |
254 .map(|pair| { |
176 use itertools::EitherOrBoth::*; |
255 use itertools::EitherOrBoth::*; |
|
256 let is_fs_only = pair.is_right(); |
177 match pair { |
257 match pair { |
178 Both(dirstate_node, fs_entry) => self |
258 Both(dirstate_node, fs_entry) => self |
179 .traverse_fs_and_dirstate( |
259 .traverse_fs_and_dirstate( |
180 &fs_entry.full_path, |
260 &fs_entry.full_path, |
181 &fs_entry.metadata, |
261 &fs_entry.metadata, |
182 dirstate_node, |
262 dirstate_node, |
183 has_ignored_ancestor, |
263 has_ignored_ancestor, |
184 ), |
264 )?, |
185 Left(dirstate_node) => { |
265 Left(dirstate_node) => { |
186 self.traverse_dirstate_only(dirstate_node) |
266 self.traverse_dirstate_only(dirstate_node)? |
187 } |
267 } |
188 Right(fs_entry) => Ok(self.traverse_fs_only( |
268 Right(fs_entry) => self.traverse_fs_only( |
189 has_ignored_ancestor, |
269 has_ignored_ancestor, |
190 directory_hg_path, |
270 directory_hg_path, |
191 fs_entry, |
271 fs_entry, |
192 )), |
272 ), |
193 } |
273 } |
|
274 Ok(is_fs_only) |
194 }) |
275 }) |
195 .collect() |
276 .try_reduce(|| false, |a, b| Ok(a || b)) |
196 } |
277 } |
197 |
278 |
198 fn traverse_fs_and_dirstate( |
279 fn traverse_fs_and_dirstate( |
199 &self, |
280 &self, |
200 fs_path: &Path, |
281 fs_path: &Path, |
222 .traversed |
303 .traversed |
223 .push(hg_path.detach_from_tree()) |
304 .push(hg_path.detach_from_tree()) |
224 } |
305 } |
225 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path); |
306 let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path); |
226 let is_at_repo_root = false; |
307 let is_at_repo_root = false; |
227 self.traverse_fs_directory_and_dirstate( |
308 let directory_has_any_fs_only_entry = self |
228 is_ignored, |
309 .traverse_fs_directory_and_dirstate( |
229 dirstate_node.children(self.dmap.on_disk)?, |
310 is_ignored, |
230 hg_path, |
311 dirstate_node.children(self.dmap.on_disk)?, |
231 fs_path, |
312 hg_path, |
232 is_at_repo_root, |
313 fs_path, |
|
314 Some(fs_metadata), |
|
315 dirstate_node.cached_directory_mtime(), |
|
316 is_at_repo_root, |
|
317 )?; |
|
318 self.maybe_save_directory_mtime( |
|
319 directory_has_any_fs_only_entry, |
|
320 fs_metadata, |
|
321 dirstate_node, |
233 )? |
322 )? |
234 } else { |
323 } else { |
235 if file_or_symlink && self.matcher.matches(hg_path) { |
324 if file_or_symlink && self.matcher.matches(hg_path) { |
236 if let Some(state) = dirstate_node.state()? { |
325 if let Some(state) = dirstate_node.state()? { |
237 match state { |
326 match state { |
267 } |
356 } |
268 |
357 |
269 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter() |
358 for child_node in dirstate_node.children(self.dmap.on_disk)?.iter() |
270 { |
359 { |
271 self.traverse_dirstate_only(child_node)? |
360 self.traverse_dirstate_only(child_node)? |
|
361 } |
|
362 } |
|
363 Ok(()) |
|
364 } |
|
365 |
|
366 fn maybe_save_directory_mtime( |
|
367 &self, |
|
368 directory_has_any_fs_only_entry: bool, |
|
369 directory_metadata: &std::fs::Metadata, |
|
370 dirstate_node: NodeRef<'tree, 'on_disk>, |
|
371 ) -> Result<(), DirstateV2ParseError> { |
|
372 if !directory_has_any_fs_only_entry { |
|
373 // All filesystem directory entries from `read_dir` have a |
|
374 // corresponding node in the dirstate, so we can reconstitute the |
|
375 // names of those entries without calling `read_dir` again. |
|
376 if let (Some(status_start), Ok(directory_mtime)) = ( |
|
377 &self.filesystem_time_at_status_start, |
|
378 directory_metadata.modified(), |
|
379 ) { |
|
380 // Although the Rust standard library’s `SystemTime` type |
|
381 // has nanosecond precision, the times reported for a |
|
382 // directory’s (or file’s) modified time may have lower |
|
383 // resolution based on the filesystem (for example ext3 |
|
384 // only stores integer seconds), kernel (see |
|
385 // https://stackoverflow.com/a/14393315/1162888), etc. |
|
386 if &directory_mtime >= status_start { |
|
387 // The directory was modified too recently, don’t cache its |
|
388 // `read_dir` results. |
|
389 // |
|
390 // A timeline like this is possible: |
|
391 // |
|
392 // 1. A change to this directory (direct child was |
|
393 // added or removed) cause its mtime to be set |
|
394 // (possibly truncated) to `directory_mtime` |
|
395 // 2. This `status` algorithm calls `read_dir` |
|
396 // 3. An other change is made to the same directory is |
|
397 // made so that calling `read_dir` agin would give |
|
398 // different results, but soon enough after 1. that |
|
399 // the mtime stays the same |
|
400 // |
|
401 // On a system where the time resolution poor, this |
|
402 // scenario is not unlikely if all three steps are caused |
|
403 // by the same script. |
|
404 } else { |
|
405 // We’ve observed (through `status_start`) that time has |
|
406 // “progressed” since `directory_mtime`, so any further |
|
407 // change to this directory is extremely likely to cause a |
|
408 // different mtime. |
|
409 // |
|
410 // Having the same mtime again is not entirely impossible |
|
411 // since the system clock is not monotonous. It could jump |
|
412 // backward to some point before `directory_mtime`, then a |
|
413 // directory change could potentially happen during exactly |
|
414 // the wrong tick. |
|
415 // |
|
416 // We deem this scenario (unlike the previous one) to be |
|
417 // unlikely enough in practice. |
|
418 let timestamp = directory_mtime.into(); |
|
419 let cached = dirstate_node.cached_directory_mtime(); |
|
420 if cached != Some(×tamp) { |
|
421 let hg_path = dirstate_node |
|
422 .full_path_borrowed(self.dmap.on_disk)? |
|
423 .detach_from_tree(); |
|
424 self.cached_directory_mtimes_to_add |
|
425 .lock() |
|
426 .unwrap() |
|
427 .push((hg_path, timestamp)) |
|
428 } |
|
429 } |
272 } |
430 } |
273 } |
431 } |
274 Ok(()) |
432 Ok(()) |
275 } |
433 } |
276 |
434 |
503 }) |
661 }) |
504 } |
662 } |
505 Ok(results) |
663 Ok(results) |
506 } |
664 } |
507 } |
665 } |
|
666 |
|
667 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory |
|
668 /// of the give repository. |
|
669 /// |
|
670 /// This is similar to `SystemTime::now()`, with the result truncated to the |
|
671 /// same time resolution as other files’ modification times. Using `.hg` |
|
672 /// instead of the system’s default temporary directory (such as `/tmp`) makes |
|
673 /// it more likely the temporary file is in the same disk partition as contents |
|
674 /// of the working directory, which can matter since different filesystems may |
|
675 /// store timestamps with different resolutions. |
|
676 /// |
|
677 /// This may fail, typically if we lack write permissions. In that case we |
|
678 /// should continue the `status()` algoritm anyway and consider the current |
|
679 /// date/time to be unknown. |
|
680 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> { |
|
681 tempfile::tempfile_in(repo_root.join(".hg"))? |
|
682 .metadata()? |
|
683 .modified() |
|
684 } |