rust/hg-core/src/dirstate_tree/status.rs
changeset 47349 7138c863d0a1
parent 47347 73ddcedeaadf
child 47350 04d1f17f49e7
equal deleted inserted replaced
47348:a4de570e61fa 47349:7138c863d0a1
     1 use crate::dirstate::status::IgnoreFnType;
     1 use crate::dirstate::status::IgnoreFnType;
     2 use crate::dirstate_tree::dirstate_map::BorrowedPath;
     2 use crate::dirstate_tree::dirstate_map::BorrowedPath;
     3 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
     3 use crate::dirstate_tree::dirstate_map::ChildNodesRef;
     4 use crate::dirstate_tree::dirstate_map::DirstateMap;
     4 use crate::dirstate_tree::dirstate_map::DirstateMap;
       
     5 use crate::dirstate_tree::dirstate_map::NodeData;
     5 use crate::dirstate_tree::dirstate_map::NodeRef;
     6 use crate::dirstate_tree::dirstate_map::NodeRef;
     6 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
     7 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
       
     8 use crate::dirstate_tree::on_disk::Timestamp;
       
     9 use crate::dirstate_tree::path_with_basename::WithBasename;
     7 use crate::matchers::get_ignore_function;
    10 use crate::matchers::get_ignore_function;
     8 use crate::matchers::Matcher;
    11 use crate::matchers::Matcher;
     9 use crate::utils::files::get_bytes_from_os_string;
    12 use crate::utils::files::get_bytes_from_os_string;
    10 use crate::utils::files::get_path_from_bytes;
    13 use crate::utils::files::get_path_from_bytes;
    11 use crate::utils::hg_path::HgPath;
    14 use crate::utils::hg_path::HgPath;
    16 use crate::PatternFileWarning;
    19 use crate::PatternFileWarning;
    17 use crate::StatusError;
    20 use crate::StatusError;
    18 use crate::StatusOptions;
    21 use crate::StatusOptions;
    19 use micro_timer::timed;
    22 use micro_timer::timed;
    20 use rayon::prelude::*;
    23 use rayon::prelude::*;
       
    24 use std::borrow::Cow;
    21 use std::io;
    25 use std::io;
    22 use std::path::Path;
    26 use std::path::Path;
    23 use std::path::PathBuf;
    27 use std::path::PathBuf;
    24 use std::sync::Mutex;
    28 use std::sync::Mutex;
       
    29 use std::time::SystemTime;
    25 
    30 
    26 /// Returns the status of the working directory compared to its parent
    31 /// Returns the status of the working directory compared to its parent
    27 /// changeset.
    32 /// changeset.
    28 ///
    33 ///
    29 /// This algorithm is based on traversing the filesystem tree (`fs` in function
    34 /// This algorithm is based on traversing the filesystem tree (`fs` in function
    50     let common = StatusCommon {
    55     let common = StatusCommon {
    51         dmap,
    56         dmap,
    52         options,
    57         options,
    53         matcher,
    58         matcher,
    54         ignore_fn,
    59         ignore_fn,
    55         outcome: Mutex::new(DirstateStatus::default()),
    60         outcome: Default::default(),
       
    61         cached_directory_mtimes_to_add: Default::default(),
       
    62         filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
    56     };
    63     };
    57     let is_at_repo_root = true;
    64     let is_at_repo_root = true;
    58     let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
    65     let hg_path = &BorrowedPath::OnDisk(HgPath::new(""));
    59     let has_ignored_ancestor = false;
    66     let has_ignored_ancestor = false;
       
    67     let root_cached_mtime = None;
       
    68     let root_dir_metadata = None;
       
    69     // If the path we have for the repository root is a symlink, do follow it.
       
    70     // (As opposed to symlinks within the working directory which are not
       
    71     // followed, using `std::fs::symlink_metadata`.)
    60     common.traverse_fs_directory_and_dirstate(
    72     common.traverse_fs_directory_and_dirstate(
    61         has_ignored_ancestor,
    73         has_ignored_ancestor,
    62         dmap.root.as_ref(),
    74         dmap.root.as_ref(),
    63         hg_path,
    75         hg_path,
    64         &root_dir,
    76         &root_dir,
       
    77         root_dir_metadata,
       
    78         root_cached_mtime,
    65         is_at_repo_root,
    79         is_at_repo_root,
    66     )?;
    80     )?;
    67     Ok((common.outcome.into_inner().unwrap(), warnings))
    81     let outcome = common.outcome.into_inner().unwrap();
       
    82     let to_add = common.cached_directory_mtimes_to_add.into_inner().unwrap();
       
    83     for (path, mtime) in &to_add {
       
    84         let node = DirstateMap::get_or_insert_node(
       
    85             dmap.on_disk,
       
    86             &mut dmap.root,
       
    87             path,
       
    88             WithBasename::to_cow_owned,
       
    89             |_| {},
       
    90         )?;
       
    91         match &node.data {
       
    92             NodeData::Entry(_) => {} // Don’t overwrite an entry
       
    93             NodeData::CachedDirectory { .. } | NodeData::None => {
       
    94                 node.data = NodeData::CachedDirectory { mtime: *mtime }
       
    95             }
       
    96         }
       
    97     }
       
    98     Ok((outcome, warnings))
    68 }
    99 }
    69 
   100 
    70 /// Bag of random things needed by various parts of the algorithm. Reduces the
   101 /// Bag of random things needed by various parts of the algorithm. Reduces the
    71 /// number of parameters passed to functions.
   102 /// number of parameters passed to functions.
    72 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
   103 struct StatusCommon<'a, 'tree, 'on_disk: 'tree> {
    73     dmap: &'tree DirstateMap<'on_disk>,
   104     dmap: &'tree DirstateMap<'on_disk>,
    74     options: StatusOptions,
   105     options: StatusOptions,
    75     matcher: &'a (dyn Matcher + Sync),
   106     matcher: &'a (dyn Matcher + Sync),
    76     ignore_fn: IgnoreFnType<'a>,
   107     ignore_fn: IgnoreFnType<'a>,
    77     outcome: Mutex<DirstateStatus<'on_disk>>,
   108     outcome: Mutex<DirstateStatus<'on_disk>>,
       
   109     cached_directory_mtimes_to_add:
       
   110         Mutex<Vec<(Cow<'on_disk, HgPath>, Timestamp)>>,
       
   111 
       
   112     /// The current time at the start of the `status()` algorithm, as measured
       
   113     /// and possibly truncated by the filesystem.
       
   114     filesystem_time_at_status_start: Option<SystemTime>,
    78 }
   115 }
    79 
   116 
    80 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
   117 impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> {
    81     fn read_dir(
   118     fn read_dir(
    82         &self,
   119         &self,
    95             .unwrap()
   132             .unwrap()
    96             .bad
   133             .bad
    97             .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
   134             .push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
    98     }
   135     }
    99 
   136 
       
   137     /// If this returns true, we can get accurate results by only using
       
   138     /// `symlink_metadata` for child nodes that exist in the dirstate and don’t
       
   139     /// need to call `read_dir`.
       
   140     fn can_skip_fs_readdir(
       
   141         &self,
       
   142         directory_metadata: Option<&std::fs::Metadata>,
       
   143         cached_directory_mtime: Option<&Timestamp>,
       
   144     ) -> bool {
       
   145         if !self.options.list_unknown && !self.options.list_ignored {
       
   146             // All states that we care about listing have corresponding
       
   147             // dirstate entries.
       
   148             // This happens for example with `hg status -mard`.
       
   149             return true;
       
   150         }
       
   151         if let Some(cached_mtime) = cached_directory_mtime {
       
   152             // The dirstate contains a cached mtime for this directory, set by
       
   153             // a previous run of the `status` algorithm which found this
       
   154             // directory eligible for `read_dir` caching.
       
   155             if let Some(meta) = directory_metadata {
       
   156                 if let Ok(current_mtime) = meta.modified() {
       
   157                     if current_mtime == cached_mtime.into() {
       
   158                         // The mtime of that directory has not changed since
       
   159                         // then, which means that the
       
   160                         // results of `read_dir` should also
       
   161                         // be unchanged.
       
   162                         return true;
       
   163                     }
       
   164                 }
       
   165             }
       
   166         }
       
   167         false
       
   168     }
       
   169 
       
   170     /// Returns whether the filesystem directory was found to have any entry
       
   171     /// that does not have a corresponding dirstate tree node.
   100     fn traverse_fs_directory_and_dirstate(
   172     fn traverse_fs_directory_and_dirstate(
   101         &self,
   173         &self,
   102         has_ignored_ancestor: bool,
   174         has_ignored_ancestor: bool,
   103         dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
   175         dirstate_nodes: ChildNodesRef<'tree, 'on_disk>,
   104         directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
   176         directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
   105         directory_fs_path: &Path,
   177         directory_fs_path: &Path,
       
   178         directory_metadata: Option<&std::fs::Metadata>,
       
   179         cached_directory_mtime: Option<&Timestamp>,
   106         is_at_repo_root: bool,
   180         is_at_repo_root: bool,
   107     ) -> Result<(), DirstateV2ParseError> {
   181     ) -> Result<bool, DirstateV2ParseError> {
   108         if !self.options.list_unknown && !self.options.list_ignored {
   182         if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
   109             // We only care about files in the dirstate, so we can skip listing
   183         {
   110             // filesystem directories entirely.
   184             dirstate_nodes
   111             return dirstate_nodes
       
   112                 .par_iter()
   185                 .par_iter()
   113                 .map(|dirstate_node| {
   186                 .map(|dirstate_node| {
   114                     let fs_path = directory_fs_path.join(get_path_from_bytes(
   187                     let fs_path = directory_fs_path.join(get_path_from_bytes(
   115                         dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
   188                         dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
   116                     ));
   189                     ));
   129                                 dirstate_node.full_path(self.dmap.on_disk)?;
   202                                 dirstate_node.full_path(self.dmap.on_disk)?;
   130                             Ok(self.io_error(error, hg_path))
   203                             Ok(self.io_error(error, hg_path))
   131                         }
   204                         }
   132                     }
   205                     }
   133                 })
   206                 })
   134                 .collect();
   207                 .collect::<Result<_, _>>()?;
       
   208 
       
   209             // Conservatively don’t let the caller assume that there aren’t
       
   210             // any, since we don’t know.
       
   211             let directory_has_any_fs_only_entry = true;
       
   212 
       
   213             return Ok(directory_has_any_fs_only_entry);
   135         }
   214         }
   136 
   215 
   137         let mut fs_entries = if let Ok(entries) = self.read_dir(
   216         let mut fs_entries = if let Ok(entries) = self.read_dir(
   138             directory_hg_path,
   217             directory_hg_path,
   139             directory_fs_path,
   218             directory_fs_path,
   172             },
   251             },
   173         )
   252         )
   174         .par_bridge()
   253         .par_bridge()
   175         .map(|pair| {
   254         .map(|pair| {
   176             use itertools::EitherOrBoth::*;
   255             use itertools::EitherOrBoth::*;
       
   256             let is_fs_only = pair.is_right();
   177             match pair {
   257             match pair {
   178                 Both(dirstate_node, fs_entry) => self
   258                 Both(dirstate_node, fs_entry) => self
   179                     .traverse_fs_and_dirstate(
   259                     .traverse_fs_and_dirstate(
   180                         &fs_entry.full_path,
   260                         &fs_entry.full_path,
   181                         &fs_entry.metadata,
   261                         &fs_entry.metadata,
   182                         dirstate_node,
   262                         dirstate_node,
   183                         has_ignored_ancestor,
   263                         has_ignored_ancestor,
   184                     ),
   264                     )?,
   185                 Left(dirstate_node) => {
   265                 Left(dirstate_node) => {
   186                     self.traverse_dirstate_only(dirstate_node)
   266                     self.traverse_dirstate_only(dirstate_node)?
   187                 }
   267                 }
   188                 Right(fs_entry) => Ok(self.traverse_fs_only(
   268                 Right(fs_entry) => self.traverse_fs_only(
   189                     has_ignored_ancestor,
   269                     has_ignored_ancestor,
   190                     directory_hg_path,
   270                     directory_hg_path,
   191                     fs_entry,
   271                     fs_entry,
   192                 )),
   272                 ),
   193             }
   273             }
       
   274             Ok(is_fs_only)
   194         })
   275         })
   195         .collect()
   276         .try_reduce(|| false, |a, b| Ok(a || b))
   196     }
   277     }
   197 
   278 
   198     fn traverse_fs_and_dirstate(
   279     fn traverse_fs_and_dirstate(
   199         &self,
   280         &self,
   200         fs_path: &Path,
   281         fs_path: &Path,
   222                     .traversed
   303                     .traversed
   223                     .push(hg_path.detach_from_tree())
   304                     .push(hg_path.detach_from_tree())
   224             }
   305             }
   225             let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
   306             let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path);
   226             let is_at_repo_root = false;
   307             let is_at_repo_root = false;
   227             self.traverse_fs_directory_and_dirstate(
   308             let directory_has_any_fs_only_entry = self
   228                 is_ignored,
   309                 .traverse_fs_directory_and_dirstate(
   229                 dirstate_node.children(self.dmap.on_disk)?,
   310                     is_ignored,
   230                 hg_path,
   311                     dirstate_node.children(self.dmap.on_disk)?,
   231                 fs_path,
   312                     hg_path,
   232                 is_at_repo_root,
   313                     fs_path,
       
   314                     Some(fs_metadata),
       
   315                     dirstate_node.cached_directory_mtime(),
       
   316                     is_at_repo_root,
       
   317                 )?;
       
   318             self.maybe_save_directory_mtime(
       
   319                 directory_has_any_fs_only_entry,
       
   320                 fs_metadata,
       
   321                 dirstate_node,
   233             )?
   322             )?
   234         } else {
   323         } else {
   235             if file_or_symlink && self.matcher.matches(hg_path) {
   324             if file_or_symlink && self.matcher.matches(hg_path) {
   236                 if let Some(state) = dirstate_node.state()? {
   325                 if let Some(state) = dirstate_node.state()? {
   237                     match state {
   326                     match state {
   267             }
   356             }
   268 
   357 
   269             for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
   358             for child_node in dirstate_node.children(self.dmap.on_disk)?.iter()
   270             {
   359             {
   271                 self.traverse_dirstate_only(child_node)?
   360                 self.traverse_dirstate_only(child_node)?
       
   361             }
       
   362         }
       
   363         Ok(())
       
   364     }
       
   365 
       
   366     fn maybe_save_directory_mtime(
       
   367         &self,
       
   368         directory_has_any_fs_only_entry: bool,
       
   369         directory_metadata: &std::fs::Metadata,
       
   370         dirstate_node: NodeRef<'tree, 'on_disk>,
       
   371     ) -> Result<(), DirstateV2ParseError> {
       
   372         if !directory_has_any_fs_only_entry {
       
   373             // All filesystem directory entries from `read_dir` have a
       
   374             // corresponding node in the dirstate, so we can reconstitute the
       
   375             // names of those entries without calling `read_dir` again.
       
   376             if let (Some(status_start), Ok(directory_mtime)) = (
       
   377                 &self.filesystem_time_at_status_start,
       
   378                 directory_metadata.modified(),
       
   379             ) {
       
   380                 // Although the Rust standard library’s `SystemTime` type
       
   381                 // has nanosecond precision, the times reported for a
       
   382                 // directory’s (or file’s) modified time may have lower
       
   383                 // resolution based on the filesystem (for example ext3
       
   384                 // only stores integer seconds), kernel (see
       
   385                 // https://stackoverflow.com/a/14393315/1162888), etc.
       
   386                 if &directory_mtime >= status_start {
       
   387                     // The directory was modified too recently, don’t cache its
       
   388                     // `read_dir` results.
       
   389                     //
       
   390                     // A timeline like this is possible:
       
   391                     //
       
   392                     // 1. A change to this directory (direct child was
       
   393                     //    added or removed) cause its mtime to be set
       
   394                     //    (possibly truncated) to `directory_mtime`
       
   395                     // 2. This `status` algorithm calls `read_dir`
       
   396                     // 3. An other change is made to the same directory is
       
   397                     //    made so that calling `read_dir` agin would give
       
   398                     //    different results, but soon enough after 1. that
       
   399                     //    the mtime stays the same
       
   400                     //
       
   401                     // On a system where the time resolution poor, this
       
   402                     // scenario is not unlikely if all three steps are caused
       
   403                     // by the same script.
       
   404                 } else {
       
   405                     // We’ve observed (through `status_start`) that time has
       
   406                     // “progressed” since `directory_mtime`, so any further
       
   407                     // change to this directory is extremely likely to cause a
       
   408                     // different mtime.
       
   409                     //
       
   410                     // Having the same mtime again is not entirely impossible
       
   411                     // since the system clock is not monotonous. It could jump
       
   412                     // backward to some point before `directory_mtime`, then a
       
   413                     // directory change could potentially happen during exactly
       
   414                     // the wrong tick.
       
   415                     //
       
   416                     // We deem this scenario (unlike the previous one) to be
       
   417                     // unlikely enough in practice.
       
   418                     let timestamp = directory_mtime.into();
       
   419                     let cached = dirstate_node.cached_directory_mtime();
       
   420                     if cached != Some(&timestamp) {
       
   421                         let hg_path = dirstate_node
       
   422                             .full_path_borrowed(self.dmap.on_disk)?
       
   423                             .detach_from_tree();
       
   424                         self.cached_directory_mtimes_to_add
       
   425                             .lock()
       
   426                             .unwrap()
       
   427                             .push((hg_path, timestamp))
       
   428                     }
       
   429                 }
   272             }
   430             }
   273         }
   431         }
   274         Ok(())
   432         Ok(())
   275     }
   433     }
   276 
   434 
   503             })
   661             })
   504         }
   662         }
   505         Ok(results)
   663         Ok(results)
   506     }
   664     }
   507 }
   665 }
       
   666 
       
   667 /// Return the `mtime` of a temporary file newly-created in the `.hg` directory
       
   668 /// of the give repository.
       
   669 ///
       
   670 /// This is similar to `SystemTime::now()`, with the result truncated to the
       
   671 /// same time resolution as other files’ modification times. Using `.hg`
       
   672 /// instead of the system’s default temporary directory (such as `/tmp`) makes
       
   673 /// it more likely the temporary file is in the same disk partition as contents
       
   674 /// of the working directory, which can matter since different filesystems may
       
   675 /// store timestamps with different resolutions.
       
   676 ///
       
   677 /// This may fail, typically if we lack write permissions. In that case we
       
   678 /// should continue the `status()` algoritm anyway and consider the current
       
   679 /// date/time to be unknown.
       
   680 fn filesystem_now(repo_root: &Path) -> Result<SystemTime, io::Error> {
       
   681     tempfile::tempfile_in(repo_root.join(".hg"))?
       
   682         .metadata()?
       
   683         .modified()
       
   684 }