rhg: add sparse support
authorRaphaël Gomès <rgomes@octobus.net>
Tue, 19 Jul 2022 15:37:45 +0200
changeset 49485 ffd4b1f1c9cb
parent 49484 85f5d11c77dd
child 49486 e8481625c582
rhg: add sparse support
rust/hg-core/src/lib.rs
rust/hg-core/src/sparse.rs
rust/hg-core/src/vfs.rs
rust/rhg/src/commands/status.rs
rust/rhg/src/error.rs
tests/test-rhg-sparse-narrow.t
--- a/rust/hg-core/src/lib.rs	Tue Jul 19 15:37:09 2022 +0200
+++ b/rust/hg-core/src/lib.rs	Tue Jul 19 15:37:45 2022 +0200
@@ -7,6 +7,7 @@
 mod ancestors;
 pub mod dagops;
 pub mod errors;
+pub mod sparse;
 pub use ancestors::{AncestorsIterator, MissingAncestors};
 pub mod dirstate;
 pub mod dirstate_tree;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/sparse.rs	Tue Jul 19 15:37:45 2022 +0200
@@ -0,0 +1,333 @@
+use std::{collections::HashSet, path::Path};
+
+use format_bytes::{write_bytes, DisplayBytes};
+
+use crate::{
+    errors::HgError,
+    filepatterns::parse_pattern_file_contents,
+    matchers::{
+        AlwaysMatcher, DifferenceMatcher, IncludeMatcher, Matcher,
+        UnionMatcher,
+    },
+    operations::cat,
+    repo::Repo,
+    requirements::SPARSE_REQUIREMENT,
+    utils::{hg_path::HgPath, SliceExt},
+    IgnorePattern, PatternError, PatternFileWarning, PatternSyntax, Revision,
+    NULL_REVISION,
+};
+
+/// Command which is triggering the config read
+#[derive(Copy, Clone, Debug)]
+pub enum SparseConfigContext {
+    Sparse,
+    Narrow,
+}
+
+impl DisplayBytes for SparseConfigContext {
+    fn display_bytes(
+        &self,
+        output: &mut dyn std::io::Write,
+    ) -> std::io::Result<()> {
+        match self {
+            SparseConfigContext::Sparse => write_bytes!(output, b"sparse"),
+            SparseConfigContext::Narrow => write_bytes!(output, b"narrow"),
+        }
+    }
+}
+
+/// Possible warnings when reading sparse configuration
+#[derive(Debug, derive_more::From)]
+pub enum SparseWarning {
+    /// Warns about improper paths that start with "/"
+    RootWarning {
+        context: SparseConfigContext,
+        line: Vec<u8>,
+    },
+    /// Warns about a profile missing from the given changelog revision
+    ProfileNotFound { profile: Vec<u8>, rev: Revision },
+    #[from]
+    Pattern(PatternFileWarning),
+}
+
+/// Parsed sparse config
+#[derive(Debug, Default)]
+pub struct SparseConfig {
+    // Line-separated
+    includes: Vec<u8>,
+    // Line-separated
+    excludes: Vec<u8>,
+    profiles: HashSet<Vec<u8>>,
+    warnings: Vec<SparseWarning>,
+}
+
+/// All possible errors when reading sparse config
+#[derive(Debug, derive_more::From)]
+pub enum SparseConfigError {
+    IncludesAfterExcludes {
+        context: SparseConfigContext,
+    },
+    EntryOutsideSection {
+        context: SparseConfigContext,
+        line: Vec<u8>,
+    },
+    #[from]
+    HgError(HgError),
+    #[from]
+    PatternError(PatternError),
+}
+
+/// Parse sparse config file content.
+fn parse_config(
+    raw: &[u8],
+    context: SparseConfigContext,
+) -> Result<SparseConfig, SparseConfigError> {
+    let mut includes = vec![];
+    let mut excludes = vec![];
+    let mut profiles = HashSet::new();
+    let mut warnings = vec![];
+
+    #[derive(PartialEq, Eq)]
+    enum Current {
+        Includes,
+        Excludes,
+        None,
+    };
+
+    let mut current = Current::None;
+    let mut in_section = false;
+
+    for line in raw.split(|c| *c == b'\n') {
+        let line = line.trim();
+        if line.is_empty() || line[0] == b'#' {
+            // empty or comment line, skip
+            continue;
+        }
+        if line.starts_with(b"%include ") {
+            let profile = line[b"%include ".len()..].trim();
+            if !profile.is_empty() {
+                profiles.insert(profile.into());
+            }
+        } else if line == b"[include]" {
+            if in_section && current == Current::Includes {
+                return Err(SparseConfigError::IncludesAfterExcludes {
+                    context,
+                });
+            }
+            in_section = true;
+            current = Current::Includes;
+            continue;
+        } else if line == b"[exclude]" {
+            in_section = true;
+            current = Current::Excludes;
+        } else {
+            if current == Current::None {
+                return Err(SparseConfigError::EntryOutsideSection {
+                    context,
+                    line: line.into(),
+                });
+            }
+            if line.trim().starts_with(b"/") {
+                warnings.push(SparseWarning::RootWarning {
+                    context,
+                    line: line.into(),
+                });
+                continue;
+            }
+            match current {
+                Current::Includes => {
+                    includes.push(b'\n');
+                    includes.extend(line.iter());
+                }
+                Current::Excludes => {
+                    excludes.push(b'\n');
+                    excludes.extend(line.iter());
+                }
+                Current::None => unreachable!(),
+            }
+        }
+    }
+
+    Ok(SparseConfig {
+        includes,
+        excludes,
+        profiles,
+        warnings,
+    })
+}
+
+fn read_temporary_includes(
+    repo: &Repo,
+) -> Result<Vec<Vec<u8>>, SparseConfigError> {
+    let raw = repo.hg_vfs().try_read("tempsparse")?.unwrap_or(vec![]);
+    if raw.is_empty() {
+        return Ok(vec![]);
+    }
+    Ok(raw.split(|c| *c == b'\n').map(ToOwned::to_owned).collect())
+}
+
+/// Obtain sparse checkout patterns for the given revision
+fn patterns_for_rev(
+    repo: &Repo,
+    rev: Revision,
+) -> Result<Option<SparseConfig>, SparseConfigError> {
+    if !repo.has_sparse() {
+        return Ok(None);
+    }
+    let raw = repo.hg_vfs().try_read("sparse")?.unwrap_or(vec![]);
+
+    if raw.is_empty() {
+        return Ok(None);
+    }
+
+    let mut config = parse_config(&raw, SparseConfigContext::Sparse)?;
+
+    if !config.profiles.is_empty() {
+        let mut profiles: Vec<Vec<u8>> = config.profiles.into_iter().collect();
+        let mut visited = HashSet::new();
+
+        while let Some(profile) = profiles.pop() {
+            if visited.contains(&profile) {
+                continue;
+            }
+            visited.insert(profile.to_owned());
+
+            let output =
+                cat(repo, &rev.to_string(), vec![HgPath::new(&profile)])
+                    .map_err(|_| {
+                        HgError::corrupted(format!(
+                            "dirstate points to non-existent parent node"
+                        ))
+                    })?;
+            if output.results.is_empty() {
+                config.warnings.push(SparseWarning::ProfileNotFound {
+                    profile: profile.to_owned(),
+                    rev,
+                })
+            }
+
+            let subconfig = parse_config(
+                &output.results[0].1,
+                SparseConfigContext::Sparse,
+            )?;
+            if !subconfig.includes.is_empty() {
+                config.includes.push(b'\n');
+                config.includes.extend(&subconfig.includes);
+            }
+            if !subconfig.includes.is_empty() {
+                config.includes.push(b'\n');
+                config.excludes.extend(&subconfig.excludes);
+            }
+            config.warnings.extend(subconfig.warnings.into_iter());
+            profiles.extend(subconfig.profiles.into_iter());
+        }
+
+        config.profiles = visited;
+    }
+
+    if !config.includes.is_empty() {
+        config.includes.extend(b"\n.hg*");
+    }
+
+    Ok(Some(config))
+}
+
+/// Obtain a matcher for sparse working directories.
+pub fn matcher(
+    repo: &Repo,
+) -> Result<(Box<dyn Matcher + Sync>, Vec<SparseWarning>), SparseConfigError> {
+    let mut warnings = vec![];
+    if !repo.requirements().contains(SPARSE_REQUIREMENT) {
+        return Ok((Box::new(AlwaysMatcher), warnings));
+    }
+
+    let parents = repo.dirstate_parents()?;
+    let mut revs = vec![];
+    let p1_rev =
+        repo.changelog()?
+            .rev_from_node(parents.p1.into())
+            .map_err(|_| {
+                HgError::corrupted(format!(
+                    "dirstate points to non-existent parent node"
+                ))
+            })?;
+    if p1_rev != NULL_REVISION {
+        revs.push(p1_rev)
+    }
+    let p2_rev =
+        repo.changelog()?
+            .rev_from_node(parents.p2.into())
+            .map_err(|_| {
+                HgError::corrupted(format!(
+                    "dirstate points to non-existent parent node"
+                ))
+            })?;
+    if p2_rev != NULL_REVISION {
+        revs.push(p2_rev)
+    }
+    let mut matchers = vec![];
+
+    for rev in revs.iter() {
+        let config = patterns_for_rev(repo, *rev);
+        if let Ok(Some(config)) = config {
+            warnings.extend(config.warnings);
+            let mut m: Box<dyn Matcher + Sync> = Box::new(AlwaysMatcher);
+            if !config.includes.is_empty() {
+                let (patterns, subwarnings) = parse_pattern_file_contents(
+                    &config.includes,
+                    Path::new(""),
+                    Some(b"relglob:".as_ref()),
+                    false,
+                )?;
+                warnings.extend(subwarnings.into_iter().map(From::from));
+                m = Box::new(IncludeMatcher::new(patterns)?);
+            }
+            if !config.excludes.is_empty() {
+                let (patterns, subwarnings) = parse_pattern_file_contents(
+                    &config.excludes,
+                    Path::new(""),
+                    Some(b"relglob:".as_ref()),
+                    false,
+                )?;
+                warnings.extend(subwarnings.into_iter().map(From::from));
+                m = Box::new(DifferenceMatcher::new(
+                    m,
+                    Box::new(IncludeMatcher::new(patterns)?),
+                ));
+            }
+            matchers.push(m);
+        }
+    }
+    let result: Box<dyn Matcher + Sync> = match matchers.len() {
+        0 => Box::new(AlwaysMatcher),
+        1 => matchers.pop().expect("1 is equal to 0"),
+        _ => Box::new(UnionMatcher::new(matchers)),
+    };
+
+    let matcher =
+        force_include_matcher(result, &read_temporary_includes(repo)?)?;
+    Ok((matcher, warnings))
+}
+
+/// Returns a matcher that returns true for any of the forced includes before
+/// testing against the actual matcher
+fn force_include_matcher(
+    result: Box<dyn Matcher + Sync>,
+    temp_includes: &[Vec<u8>],
+) -> Result<Box<dyn Matcher + Sync>, PatternError> {
+    if temp_includes.is_empty() {
+        return Ok(result);
+    }
+    let forced_include_matcher = IncludeMatcher::new(
+        temp_includes
+            .into_iter()
+            .map(|include| {
+                IgnorePattern::new(PatternSyntax::Path, include, Path::new(""))
+            })
+            .collect(),
+    )?;
+    Ok(Box::new(UnionMatcher::new(vec![
+        Box::new(forced_include_matcher),
+        result,
+    ])))
+}
--- a/rust/hg-core/src/vfs.rs	Tue Jul 19 15:37:09 2022 +0200
+++ b/rust/hg-core/src/vfs.rs	Tue Jul 19 15:37:45 2022 +0200
@@ -40,6 +40,23 @@
         std::fs::read(&path).when_reading_file(&path)
     }
 
+    /// Returns `Ok(None)` if the file does not exist.
+    pub fn try_read(
+        &self,
+        relative_path: impl AsRef<Path>,
+    ) -> Result<Option<Vec<u8>>, HgError> {
+        match self.read(relative_path) {
+            Err(e) => match &e {
+                HgError::IoError { error, .. } => match error.kind() {
+                    ErrorKind::NotFound => return Ok(None),
+                    _ => Err(e),
+                },
+                _ => Err(e),
+            },
+            Ok(v) => Ok(Some(v)),
+        }
+    }
+
     fn mmap_open_gen(
         &self,
         relative_path: impl AsRef<Path>,
--- a/rust/rhg/src/commands/status.rs	Tue Jul 19 15:37:09 2022 +0200
+++ b/rust/rhg/src/commands/status.rs	Tue Jul 19 15:37:45 2022 +0200
@@ -18,8 +18,8 @@
 use hg::errors::{HgError, IoResultExt};
 use hg::lock::LockError;
 use hg::manifest::Manifest;
-use hg::matchers::AlwaysMatcher;
 use hg::repo::Repo;
+use hg::sparse::{matcher, SparseWarning};
 use hg::utils::files::get_bytes_from_os_string;
 use hg::utils::files::get_bytes_from_path;
 use hg::utils::files::get_path_from_bytes;
@@ -251,9 +251,9 @@
         };
     }
 
-    if repo.has_sparse() || repo.has_narrow() {
+    if repo.has_narrow() {
         return Err(CommandError::unsupported(
-            "rhg status is not supported for sparse checkouts or narrow clones yet"
+            "rhg status is not supported for narrow clones yet",
         ));
     }
 
@@ -366,9 +366,36 @@
             filesystem_time_at_status_start,
         ))
     };
+    let (matcher, sparse_warnings) = matcher(repo)?;
+
+    for warning in sparse_warnings {
+        match &warning {
+            SparseWarning::RootWarning { context, line } => {
+                let msg = format_bytes!(
+                    b"warning: {} profile cannot use paths \"
+                    starting with /, ignoring {}\n",
+                    context,
+                    line
+                );
+                ui.write_stderr(&msg)?;
+            }
+            SparseWarning::ProfileNotFound { profile, rev } => {
+                let msg = format_bytes!(
+                    b"warning: sparse profile '{}' not found \"
+                    in rev {} - ignoring it\n",
+                    profile,
+                    rev
+                );
+                ui.write_stderr(&msg)?;
+            }
+            SparseWarning::Pattern(e) => {
+                ui.write_stderr(&print_pattern_file_warning(e, &repo))?;
+            }
+        }
+    }
     let (fixup, mut dirstate_write_needed, filesystem_time_at_status_start) =
         dmap.with_status(
-            &AlwaysMatcher,
+            matcher.as_ref(),
             repo.working_directory_path().to_owned(),
             ignore_files(repo, config),
             options,
--- a/rust/rhg/src/error.rs	Tue Jul 19 15:37:09 2022 +0200
+++ b/rust/rhg/src/error.rs	Tue Jul 19 15:37:45 2022 +0200
@@ -8,6 +8,7 @@
 use hg::exit_codes;
 use hg::repo::RepoError;
 use hg::revlog::revlog::RevlogError;
+use hg::sparse::SparseConfigError;
 use hg::utils::files::get_bytes_from_path;
 use hg::{DirstateError, DirstateMapError, StatusError};
 use std::convert::From;
@@ -52,6 +53,18 @@
         }
     }
 
+    pub fn abort_with_exit_code_bytes(
+        message: impl AsRef<[u8]>,
+        detailed_exit_code: exit_codes::ExitCode,
+    ) -> Self {
+        // TODO: use this everywhere it makes sense instead of the string
+        // version.
+        CommandError::Abort {
+            message: message.as_ref().into(),
+            detailed_exit_code,
+        }
+    }
+
     pub fn unsupported(message: impl AsRef<str>) -> Self {
         CommandError::UnsupportedFeature {
             message: utf8_to_local(message.as_ref()).into(),
@@ -212,3 +225,33 @@
         HgError::from(error).into()
     }
 }
+
+impl From<SparseConfigError> for CommandError {
+    fn from(e: SparseConfigError) -> Self {
+        match e {
+            SparseConfigError::IncludesAfterExcludes { context } => {
+                Self::abort_with_exit_code_bytes(
+                    format_bytes!(
+                        b"{} config cannot have includes after excludes",
+                        context
+                    ),
+                    exit_codes::CONFIG_PARSE_ERROR_ABORT,
+                )
+            }
+            SparseConfigError::EntryOutsideSection { context, line } => {
+                Self::abort_with_exit_code_bytes(
+                    format_bytes!(
+                        b"{} config entry outside of section: {}",
+                        context,
+                        &line,
+                    ),
+                    exit_codes::CONFIG_PARSE_ERROR_ABORT,
+                )
+            }
+            SparseConfigError::HgError(e) => Self::from(e),
+            SparseConfigError::PatternError(e) => {
+                Self::unsupported(format!("{}", e))
+            }
+        }
+    }
+}
--- a/tests/test-rhg-sparse-narrow.t	Tue Jul 19 15:37:09 2022 +0200
+++ b/tests/test-rhg-sparse-narrow.t	Tue Jul 19 15:37:45 2022 +0200
@@ -92,7 +92,7 @@
   $ touch dir2/q
   $ "$real_hg" status
   $ $NO_FALLBACK rhg --config rhg.status=true status
-  unsupported feature: rhg status is not supported for sparse checkouts or narrow clones yet
+  unsupported feature: rhg status is not supported for narrow clones yet
   [252]
 
 Adding "orphaned" index files: