rust/hg-core/src/sparse.rs
changeset 49485 ffd4b1f1c9cb
child 49488 7c93e38a0bbd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/sparse.rs	Tue Jul 19 15:37:45 2022 +0200
@@ -0,0 +1,333 @@
+use std::{collections::HashSet, path::Path};
+
+use format_bytes::{write_bytes, DisplayBytes};
+
+use crate::{
+    errors::HgError,
+    filepatterns::parse_pattern_file_contents,
+    matchers::{
+        AlwaysMatcher, DifferenceMatcher, IncludeMatcher, Matcher,
+        UnionMatcher,
+    },
+    operations::cat,
+    repo::Repo,
+    requirements::SPARSE_REQUIREMENT,
+    utils::{hg_path::HgPath, SliceExt},
+    IgnorePattern, PatternError, PatternFileWarning, PatternSyntax, Revision,
+    NULL_REVISION,
+};
+
+/// Command which is triggering the config read
+#[derive(Copy, Clone, Debug)]
+pub enum SparseConfigContext {
+    Sparse,
+    Narrow,
+}
+
+impl DisplayBytes for SparseConfigContext {
+    fn display_bytes(
+        &self,
+        output: &mut dyn std::io::Write,
+    ) -> std::io::Result<()> {
+        match self {
+            SparseConfigContext::Sparse => write_bytes!(output, b"sparse"),
+            SparseConfigContext::Narrow => write_bytes!(output, b"narrow"),
+        }
+    }
+}
+
+/// Possible warnings when reading sparse configuration
+#[derive(Debug, derive_more::From)]
+pub enum SparseWarning {
+    /// Warns about improper paths that start with "/"
+    RootWarning {
+        context: SparseConfigContext,
+        line: Vec<u8>,
+    },
+    /// Warns about a profile missing from the given changelog revision
+    ProfileNotFound { profile: Vec<u8>, rev: Revision },
+    #[from]
+    Pattern(PatternFileWarning),
+}
+
+/// Parsed sparse config
+#[derive(Debug, Default)]
+pub struct SparseConfig {
+    // Line-separated
+    includes: Vec<u8>,
+    // Line-separated
+    excludes: Vec<u8>,
+    profiles: HashSet<Vec<u8>>,
+    warnings: Vec<SparseWarning>,
+}
+
+/// All possible errors when reading sparse config
+#[derive(Debug, derive_more::From)]
+pub enum SparseConfigError {
+    IncludesAfterExcludes {
+        context: SparseConfigContext,
+    },
+    EntryOutsideSection {
+        context: SparseConfigContext,
+        line: Vec<u8>,
+    },
+    #[from]
+    HgError(HgError),
+    #[from]
+    PatternError(PatternError),
+}
+
+/// Parse sparse config file content.
+fn parse_config(
+    raw: &[u8],
+    context: SparseConfigContext,
+) -> Result<SparseConfig, SparseConfigError> {
+    let mut includes = vec![];
+    let mut excludes = vec![];
+    let mut profiles = HashSet::new();
+    let mut warnings = vec![];
+
+    #[derive(PartialEq, Eq)]
+    enum Current {
+        Includes,
+        Excludes,
+        None,
+    };
+
+    let mut current = Current::None;
+    let mut in_section = false;
+
+    for line in raw.split(|c| *c == b'\n') {
+        let line = line.trim();
+        if line.is_empty() || line[0] == b'#' {
+            // empty or comment line, skip
+            continue;
+        }
+        if line.starts_with(b"%include ") {
+            let profile = line[b"%include ".len()..].trim();
+            if !profile.is_empty() {
+                profiles.insert(profile.into());
+            }
+        } else if line == b"[include]" {
+            if in_section && current == Current::Includes {
+                return Err(SparseConfigError::IncludesAfterExcludes {
+                    context,
+                });
+            }
+            in_section = true;
+            current = Current::Includes;
+            continue;
+        } else if line == b"[exclude]" {
+            in_section = true;
+            current = Current::Excludes;
+        } else {
+            if current == Current::None {
+                return Err(SparseConfigError::EntryOutsideSection {
+                    context,
+                    line: line.into(),
+                });
+            }
+            if line.trim().starts_with(b"/") {
+                warnings.push(SparseWarning::RootWarning {
+                    context,
+                    line: line.into(),
+                });
+                continue;
+            }
+            match current {
+                Current::Includes => {
+                    includes.push(b'\n');
+                    includes.extend(line.iter());
+                }
+                Current::Excludes => {
+                    excludes.push(b'\n');
+                    excludes.extend(line.iter());
+                }
+                Current::None => unreachable!(),
+            }
+        }
+    }
+
+    Ok(SparseConfig {
+        includes,
+        excludes,
+        profiles,
+        warnings,
+    })
+}
+
+fn read_temporary_includes(
+    repo: &Repo,
+) -> Result<Vec<Vec<u8>>, SparseConfigError> {
+    let raw = repo.hg_vfs().try_read("tempsparse")?.unwrap_or(vec![]);
+    if raw.is_empty() {
+        return Ok(vec![]);
+    }
+    Ok(raw.split(|c| *c == b'\n').map(ToOwned::to_owned).collect())
+}
+
+/// Obtain sparse checkout patterns for the given revision
+fn patterns_for_rev(
+    repo: &Repo,
+    rev: Revision,
+) -> Result<Option<SparseConfig>, SparseConfigError> {
+    if !repo.has_sparse() {
+        return Ok(None);
+    }
+    let raw = repo.hg_vfs().try_read("sparse")?.unwrap_or(vec![]);
+
+    if raw.is_empty() {
+        return Ok(None);
+    }
+
+    let mut config = parse_config(&raw, SparseConfigContext::Sparse)?;
+
+    if !config.profiles.is_empty() {
+        let mut profiles: Vec<Vec<u8>> = config.profiles.into_iter().collect();
+        let mut visited = HashSet::new();
+
+        while let Some(profile) = profiles.pop() {
+            if visited.contains(&profile) {
+                continue;
+            }
+            visited.insert(profile.to_owned());
+
+            let output =
+                cat(repo, &rev.to_string(), vec![HgPath::new(&profile)])
+                    .map_err(|_| {
+                        HgError::corrupted(format!(
+                            "dirstate points to non-existent parent node"
+                        ))
+                    })?;
+            if output.results.is_empty() {
+                config.warnings.push(SparseWarning::ProfileNotFound {
+                    profile: profile.to_owned(),
+                    rev,
+                })
+            }
+
+            let subconfig = parse_config(
+                &output.results[0].1,
+                SparseConfigContext::Sparse,
+            )?;
+            if !subconfig.includes.is_empty() {
+                config.includes.push(b'\n');
+                config.includes.extend(&subconfig.includes);
+            }
+            if !subconfig.includes.is_empty() {
+                config.includes.push(b'\n');
+                config.excludes.extend(&subconfig.excludes);
+            }
+            config.warnings.extend(subconfig.warnings.into_iter());
+            profiles.extend(subconfig.profiles.into_iter());
+        }
+
+        config.profiles = visited;
+    }
+
+    if !config.includes.is_empty() {
+        config.includes.extend(b"\n.hg*");
+    }
+
+    Ok(Some(config))
+}
+
+/// Obtain a matcher for sparse working directories.
+pub fn matcher(
+    repo: &Repo,
+) -> Result<(Box<dyn Matcher + Sync>, Vec<SparseWarning>), SparseConfigError> {
+    let mut warnings = vec![];
+    if !repo.requirements().contains(SPARSE_REQUIREMENT) {
+        return Ok((Box::new(AlwaysMatcher), warnings));
+    }
+
+    let parents = repo.dirstate_parents()?;
+    let mut revs = vec![];
+    let p1_rev =
+        repo.changelog()?
+            .rev_from_node(parents.p1.into())
+            .map_err(|_| {
+                HgError::corrupted(format!(
+                    "dirstate points to non-existent parent node"
+                ))
+            })?;
+    if p1_rev != NULL_REVISION {
+        revs.push(p1_rev)
+    }
+    let p2_rev =
+        repo.changelog()?
+            .rev_from_node(parents.p2.into())
+            .map_err(|_| {
+                HgError::corrupted(format!(
+                    "dirstate points to non-existent parent node"
+                ))
+            })?;
+    if p2_rev != NULL_REVISION {
+        revs.push(p2_rev)
+    }
+    let mut matchers = vec![];
+
+    for rev in revs.iter() {
+        let config = patterns_for_rev(repo, *rev);
+        if let Ok(Some(config)) = config {
+            warnings.extend(config.warnings);
+            let mut m: Box<dyn Matcher + Sync> = Box::new(AlwaysMatcher);
+            if !config.includes.is_empty() {
+                let (patterns, subwarnings) = parse_pattern_file_contents(
+                    &config.includes,
+                    Path::new(""),
+                    Some(b"relglob:".as_ref()),
+                    false,
+                )?;
+                warnings.extend(subwarnings.into_iter().map(From::from));
+                m = Box::new(IncludeMatcher::new(patterns)?);
+            }
+            if !config.excludes.is_empty() {
+                let (patterns, subwarnings) = parse_pattern_file_contents(
+                    &config.excludes,
+                    Path::new(""),
+                    Some(b"relglob:".as_ref()),
+                    false,
+                )?;
+                warnings.extend(subwarnings.into_iter().map(From::from));
+                m = Box::new(DifferenceMatcher::new(
+                    m,
+                    Box::new(IncludeMatcher::new(patterns)?),
+                ));
+            }
+            matchers.push(m);
+        }
+    }
+    let result: Box<dyn Matcher + Sync> = match matchers.len() {
+        0 => Box::new(AlwaysMatcher),
+        1 => matchers.pop().expect("1 is equal to 0"),
+        _ => Box::new(UnionMatcher::new(matchers)),
+    };
+
+    let matcher =
+        force_include_matcher(result, &read_temporary_includes(repo)?)?;
+    Ok((matcher, warnings))
+}
+
+/// Returns a matcher that returns true for any of the forced includes before
+/// testing against the actual matcher
+fn force_include_matcher(
+    result: Box<dyn Matcher + Sync>,
+    temp_includes: &[Vec<u8>],
+) -> Result<Box<dyn Matcher + Sync>, PatternError> {
+    if temp_includes.is_empty() {
+        return Ok(result);
+    }
+    let forced_include_matcher = IncludeMatcher::new(
+        temp_includes
+            .into_iter()
+            .map(|include| {
+                IgnorePattern::new(PatternSyntax::Path, include, Path::new(""))
+            })
+            .collect(),
+    )?;
+    Ok(Box::new(UnionMatcher::new(vec![
+        Box::new(forced_include_matcher),
+        result,
+    ])))
+}