hg-core: add basic config module
authorRaphaël Gomès <rgomes@octobus.net>
Tue, 29 Dec 2020 10:53:45 +0100
changeset 46187 95d6f31e88db
parent 46186 5f27924a201d
child 46188 945b33a7edfd
hg-core: add basic config module The config module exposes a `Config` struct, unused for now. It only reads the config file local to the repository, but handles all valid patterns and includes/unsets. It is structured in layers instead of erasing by reverse order of precedence, allowing us to transparently know more about the config for debugging purposes, and potentially other things I haven't thought about yet. This change also introduces `format_bytes!` to `hg-core`. Differential Revision: https://phab.mercurial-scm.org/D9408
rust/hg-core/src/config.rs
rust/hg-core/src/config/config.rs
rust/hg-core/src/config/layer.rs
rust/hg-core/src/lib.rs
rust/hg-core/src/utils/files.rs
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/config.rs	Tue Dec 29 10:53:45 2020 +0100
@@ -0,0 +1,14 @@
+// config.rs
+//
+// Copyright 2020
+//      Valentin Gatien-Baron,
+//      Raphaël Gomès <rgomes@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Mercurial config parsing and interfaces.
+
+mod config;
+mod layer;
+pub use config::Config;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/config/config.rs	Tue Dec 29 10:53:45 2020 +0100
@@ -0,0 +1,197 @@
+// config.rs
+//
+// Copyright 2020
+//      Valentin Gatien-Baron,
+//      Raphaël Gomès <rgomes@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+use super::layer;
+use crate::config::layer::{ConfigError, ConfigLayer, ConfigValue};
+use std::path::PathBuf;
+
+use crate::operations::find_root;
+use crate::utils::files::read_whole_file;
+
+/// Holds the config values for the current repository
+/// TODO update this docstring once we support more sources
+pub struct Config {
+    layers: Vec<layer::ConfigLayer>,
+}
+
+impl std::fmt::Debug for Config {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        for (index, layer) in self.layers.iter().rev().enumerate() {
+            write!(
+                f,
+                "==== Layer {} (trusted: {}) ====\n{:?}",
+                index, layer.trusted, layer
+            )?;
+        }
+        Ok(())
+    }
+}
+
+pub enum ConfigSource {
+    /// Absolute path to a config file
+    AbsPath(PathBuf),
+    /// Already parsed (from the CLI, env, Python resources, etc.)
+    Parsed(layer::ConfigLayer),
+}
+
+pub fn parse_bool(v: &[u8]) -> Option<bool> {
+    match v.to_ascii_lowercase().as_slice() {
+        b"1" | b"yes" | b"true" | b"on" | b"always" => Some(true),
+        b"0" | b"no" | b"false" | b"off" | b"never" => Some(false),
+        _ => None,
+    }
+}
+
+impl Config {
+    /// Loads in order, which means that the precedence is the same
+    /// as the order of `sources`.
+    pub fn load_from_explicit_sources(
+        sources: Vec<ConfigSource>,
+    ) -> Result<Self, ConfigError> {
+        let mut layers = vec![];
+
+        for source in sources.into_iter() {
+            match source {
+                ConfigSource::Parsed(c) => layers.push(c),
+                ConfigSource::AbsPath(c) => {
+                    // TODO check if it should be trusted
+                    // mercurial/ui.py:427
+                    let data = match read_whole_file(&c) {
+                        Err(_) => continue, // same as the python code
+                        Ok(data) => data,
+                    };
+                    layers.extend(ConfigLayer::parse(&c, &data)?)
+                }
+            }
+        }
+
+        Ok(Config { layers })
+    }
+
+    /// Loads the local config. In a future version, this will also load the
+    /// `$HOME/.hgrc` and more to mirror the Python implementation.
+    pub fn load() -> Result<Self, ConfigError> {
+        let root = find_root().unwrap();
+        Ok(Self::load_from_explicit_sources(vec![
+            ConfigSource::AbsPath(root.join(".hg/hgrc")),
+        ])?)
+    }
+
+    /// Returns an `Err` if the first value found is not a valid boolean.
+    /// Otherwise, returns an `Ok(option)`, where `option` is the boolean if
+    /// found, or `None`.
+    pub fn get_option(
+        &self,
+        section: &[u8],
+        item: &[u8],
+    ) -> Result<Option<bool>, ConfigError> {
+        match self.get_inner(&section, &item) {
+            Some((layer, v)) => match parse_bool(&v.bytes) {
+                Some(b) => Ok(Some(b)),
+                None => Err(ConfigError::Parse {
+                    origin: layer.origin.to_owned(),
+                    line: v.line,
+                    bytes: v.bytes.to_owned(),
+                }),
+            },
+            None => Ok(None),
+        }
+    }
+
+    /// Returns the corresponding boolean in the config. Returns `Ok(false)`
+    /// if the value is not found, an `Err` if it's not a valid boolean.
+    pub fn get_bool(
+        &self,
+        section: &[u8],
+        item: &[u8],
+    ) -> Result<bool, ConfigError> {
+        Ok(self.get_option(section, item)?.unwrap_or(false))
+    }
+
+    /// Returns the raw value bytes of the first one found, or `None`.
+    pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&[u8]> {
+        self.get_inner(section, item)
+            .map(|(_, value)| value.bytes.as_ref())
+    }
+
+    /// Returns the layer and the value of the first one found, or `None`.
+    fn get_inner(
+        &self,
+        section: &[u8],
+        item: &[u8],
+    ) -> Option<(&ConfigLayer, &ConfigValue)> {
+        for layer in self.layers.iter().rev() {
+            if !layer.trusted {
+                continue;
+            }
+            if let Some(v) = layer.get(&section, &item) {
+                return Some((&layer, v));
+            }
+        }
+        None
+    }
+
+    /// Get raw values bytes from all layers (even untrusted ones) in order
+    /// of precedence.
+    #[cfg(test)]
+    fn get_all(&self, section: &[u8], item: &[u8]) -> Vec<&[u8]> {
+        let mut res = vec![];
+        for layer in self.layers.iter().rev() {
+            if let Some(v) = layer.get(&section, &item) {
+                res.push(v.bytes.as_ref());
+            }
+        }
+        res
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+    use std::fs::File;
+    use std::io::Write;
+
+    #[test]
+    fn test_include_layer_ordering() {
+        let tmpdir = tempfile::tempdir().unwrap();
+        let tmpdir_path = tmpdir.path();
+        let mut included_file =
+            File::create(&tmpdir_path.join("included.rc")).unwrap();
+
+        included_file.write_all(b"[section]\nitem=value1").unwrap();
+        let base_config_path = tmpdir_path.join("base.rc");
+        let mut config_file = File::create(&base_config_path).unwrap();
+        let data =
+            b"[section]\nitem=value0\n%include included.rc\nitem=value2";
+        config_file.write_all(data).unwrap();
+
+        let sources = vec![ConfigSource::AbsPath(base_config_path)];
+        let config = Config::load_from_explicit_sources(sources)
+            .expect("expected valid config");
+
+        dbg!(&config);
+
+        let (_, value) = config.get_inner(b"section", b"item").unwrap();
+        assert_eq!(
+            value,
+            &ConfigValue {
+                bytes: b"value2".to_vec(),
+                line: Some(4)
+            }
+        );
+
+        let value = config.get(b"section", b"item").unwrap();
+        assert_eq!(value, b"value2",);
+        assert_eq!(
+            config.get_all(b"section", b"item"),
+            [b"value2", b"value1", b"value0"]
+        );
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/config/layer.rs	Tue Dec 29 10:53:45 2020 +0100
@@ -0,0 +1,268 @@
+// layer.rs
+//
+// Copyright 2020
+//      Valentin Gatien-Baron,
+//      Raphaël Gomès <rgomes@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+use crate::utils::files::{
+    get_bytes_from_path, get_path_from_bytes, read_whole_file,
+};
+use format_bytes::format_bytes;
+use lazy_static::lazy_static;
+use regex::bytes::Regex;
+use std::collections::HashMap;
+use std::io;
+use std::path::{Path, PathBuf};
+
+lazy_static! {
+    static ref SECTION_RE: Regex = make_regex(r"^\[([^\[]+)\]");
+    static ref ITEM_RE: Regex = make_regex(r"^([^=\s][^=]*?)\s*=\s*((.*\S)?)");
+    /// Continuation whitespace
+    static ref CONT_RE: Regex = make_regex(r"^\s+(\S|\S.*\S)\s*$");
+    static ref EMPTY_RE: Regex = make_regex(r"^(;|#|\s*$)");
+    static ref COMMENT_RE: Regex = make_regex(r"^(;|#)");
+    /// A directive that allows for removing previous entries
+    static ref UNSET_RE: Regex = make_regex(r"^%unset\s+(\S+)");
+    /// A directive that allows for including other config files
+    static ref INCLUDE_RE: Regex = make_regex(r"^%include\s+(\S|\S.*\S)\s*$");
+}
+
+/// All config values separated by layers of precedence.
+/// Each config source may be split in multiple layers if `%include` directives
+/// are used.
+/// TODO detail the general precedence
+#[derive(Clone)]
+pub struct ConfigLayer {
+    /// Mapping of the sections to their items
+    sections: HashMap<Vec<u8>, ConfigItem>,
+    /// All sections (and their items/values) in a layer share the same origin
+    pub origin: ConfigOrigin,
+    /// Whether this layer comes from a trusted user or group
+    pub trusted: bool,
+}
+
+impl ConfigLayer {
+    pub fn new(origin: ConfigOrigin) -> Self {
+        ConfigLayer {
+            sections: HashMap::new(),
+            trusted: true, // TODO check
+            origin,
+        }
+    }
+
+    /// Add an entry to the config, overwriting the old one if already present.
+    pub fn add(
+        &mut self,
+        section: Vec<u8>,
+        item: Vec<u8>,
+        value: Vec<u8>,
+        line: Option<usize>,
+    ) {
+        self.sections
+            .entry(section)
+            .or_insert_with(|| HashMap::new())
+            .insert(item, ConfigValue { bytes: value, line });
+    }
+
+    /// Returns the config value in `<section>.<item>` if it exists
+    pub fn get(&self, section: &[u8], item: &[u8]) -> Option<&ConfigValue> {
+        Some(self.sections.get(section)?.get(item)?)
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.sections.is_empty()
+    }
+
+    /// Returns a `Vec` of layers in order of precedence (so, in read order),
+    /// recursively parsing the `%include` directives if any.
+    pub fn parse(src: &Path, data: &[u8]) -> Result<Vec<Self>, ConfigError> {
+        let mut layers = vec![];
+
+        // Discard byte order mark if any
+        let data = if data.starts_with(b"\xef\xbb\xbf") {
+            &data[3..]
+        } else {
+            data
+        };
+
+        // TODO check if it's trusted
+        let mut current_layer = Self::new(ConfigOrigin::File(src.to_owned()));
+
+        let mut lines_iter =
+            data.split(|b| *b == b'\n').enumerate().peekable();
+        let mut section = b"".to_vec();
+
+        while let Some((index, bytes)) = lines_iter.next() {
+            if let Some(m) = INCLUDE_RE.captures(&bytes) {
+                let filename_bytes = &m[1];
+                let filename_to_include = get_path_from_bytes(&filename_bytes);
+                match read_include(&src, &filename_to_include) {
+                    (include_src, Ok(data)) => {
+                        layers.push(current_layer);
+                        layers.extend(Self::parse(&include_src, &data)?);
+                        current_layer =
+                            Self::new(ConfigOrigin::File(src.to_owned()));
+                    }
+                    (_, Err(e)) => {
+                        return Err(ConfigError::IncludeError {
+                            path: filename_to_include.to_owned(),
+                            io_error: e,
+                        })
+                    }
+                }
+            } else if let Some(_) = EMPTY_RE.captures(&bytes) {
+            } else if let Some(m) = SECTION_RE.captures(&bytes) {
+                section = m[1].to_vec();
+            } else if let Some(m) = ITEM_RE.captures(&bytes) {
+                let item = m[1].to_vec();
+                let mut value = m[2].to_vec();
+                loop {
+                    match lines_iter.peek() {
+                        None => break,
+                        Some((_, v)) => {
+                            if let Some(_) = COMMENT_RE.captures(&v) {
+                            } else if let Some(_) = CONT_RE.captures(&v) {
+                                value.extend(b"\n");
+                                value.extend(&m[1]);
+                            } else {
+                                break;
+                            }
+                        }
+                    };
+                    lines_iter.next();
+                }
+                current_layer.add(
+                    section.clone(),
+                    item,
+                    value,
+                    Some(index + 1),
+                );
+            } else if let Some(m) = UNSET_RE.captures(&bytes) {
+                if let Some(map) = current_layer.sections.get_mut(&section) {
+                    map.remove(&m[1]);
+                }
+            } else {
+                return Err(ConfigError::Parse {
+                    origin: ConfigOrigin::File(src.to_owned()),
+                    line: Some(index + 1),
+                    bytes: bytes.to_owned(),
+                });
+            }
+        }
+        if !current_layer.is_empty() {
+            layers.push(current_layer);
+        }
+        Ok(layers)
+    }
+}
+
+impl std::fmt::Debug for ConfigLayer {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let mut sections: Vec<_> = self.sections.iter().collect();
+        sections.sort_by(|e0, e1| e0.0.cmp(e1.0));
+
+        for (section, items) in sections.into_iter() {
+            let mut items: Vec<_> = items.into_iter().collect();
+            items.sort_by(|e0, e1| e0.0.cmp(e1.0));
+
+            for (item, config_entry) in items {
+                writeln!(
+                    f,
+                    "{}",
+                    String::from_utf8_lossy(&format_bytes!(
+                        b"{}.{}={} # {}",
+                        section,
+                        item,
+                        &config_entry.bytes,
+                        &self.origin.to_bytes(),
+                    ))
+                )?
+            }
+        }
+        Ok(())
+    }
+}
+
+/// Mapping of section item to value.
+/// In the following:
+/// ```text
+/// [ui]
+/// paginate=no
+/// ```
+/// "paginate" is the section item and "no" the value.
+pub type ConfigItem = HashMap<Vec<u8>, ConfigValue>;
+
+#[derive(Clone, Debug, PartialEq)]
+pub struct ConfigValue {
+    /// The raw bytes of the value (be it from the CLI, env or from a file)
+    pub bytes: Vec<u8>,
+    /// Only present if the value comes from a file, 1-indexed.
+    pub line: Option<usize>,
+}
+
+#[derive(Clone, Debug)]
+pub enum ConfigOrigin {
+    /// The value comes from a configuration file
+    File(PathBuf),
+    /// The value comes from the environment like `$PAGER` or `$EDITOR`
+    Environment(Vec<u8>),
+    /* TODO cli
+     * TODO defaults (configitems.py)
+     * TODO extensions
+     * TODO Python resources?
+     * Others? */
+}
+
+impl ConfigOrigin {
+    /// TODO use some kind of dedicated trait?
+    pub fn to_bytes(&self) -> Vec<u8> {
+        match self {
+            ConfigOrigin::File(p) => get_bytes_from_path(p),
+            ConfigOrigin::Environment(e) => e.to_owned(),
+        }
+    }
+}
+
+#[derive(Debug)]
+pub enum ConfigError {
+    Parse {
+        origin: ConfigOrigin,
+        line: Option<usize>,
+        bytes: Vec<u8>,
+    },
+    /// Failed to include a sub config file
+    IncludeError {
+        path: PathBuf,
+        io_error: std::io::Error,
+    },
+    /// Any IO error that isn't expected
+    IO(std::io::Error),
+}
+
+impl From<std::io::Error> for ConfigError {
+    fn from(e: std::io::Error) -> Self {
+        Self::IO(e)
+    }
+}
+
+fn make_regex(pattern: &'static str) -> Regex {
+    Regex::new(pattern).expect("expected a valid regex")
+}
+
+/// Includes are relative to the file they're defined in, unless they're
+/// absolute.
+fn read_include(
+    old_src: &Path,
+    new_src: &Path,
+) -> (PathBuf, io::Result<Vec<u8>>) {
+    if new_src.is_absolute() {
+        (new_src.to_path_buf(), read_whole_file(&new_src))
+    } else {
+        let dir = old_src.parent().unwrap();
+        let new_src = dir.join(&new_src);
+        (new_src.to_owned(), read_whole_file(&new_src))
+    }
+}
--- a/rust/hg-core/src/lib.rs	Mon Dec 14 12:08:56 2020 +0100
+++ b/rust/hg-core/src/lib.rs	Tue Dec 29 10:53:45 2020 +0100
@@ -26,6 +26,7 @@
 pub mod repo;
 pub mod revlog;
 pub use revlog::*;
+pub mod config;
 pub mod operations;
 pub mod utils;
 
--- a/rust/hg-core/src/utils/files.rs	Mon Dec 14 12:08:56 2020 +0100
+++ b/rust/hg-core/src/utils/files.rs	Tue Dec 29 10:53:45 2020 +0100
@@ -18,6 +18,7 @@
 use same_file::is_same_file;
 use std::borrow::{Cow, ToOwned};
 use std::fs::Metadata;
+use std::io::Read;
 use std::iter::FusedIterator;
 use std::ops::Deref;
 use std::path::{Path, PathBuf};
@@ -308,6 +309,17 @@
     }
 }
 
+/// Reads a file in one big chunk instead of doing multiple reads
+pub fn read_whole_file(filepath: &Path) -> std::io::Result<Vec<u8>> {
+    let mut file = std::fs::File::open(filepath)?;
+    let size = file.metadata()?.len();
+
+    let mut res = vec![0; size as usize];
+    file.read_exact(&mut res)?;
+
+    Ok(res)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;