rhg: Add support for dirstate-v2
authorSimon Sapin <simon.sapin@octobus.net>
Tue, 25 May 2021 09:20:30 +0200
changeset 47374 bd88b6bfd8da
parent 47373 d2fb8b4adcc3
child 47375 8125bcd28a5c
rhg: Add support for dirstate-v2 Differential Revision: https://phab.mercurial-scm.org/D10804
rust/hg-core/src/dirstate.rs
rust/hg-core/src/dirstate_tree.rs
rust/hg-core/src/dirstate_tree/on_disk.rs
rust/hg-core/src/operations/list_tracked_files.rs
rust/hg-core/src/repo.rs
rust/hg-core/src/requirements.rs
rust/rhg/src/commands/status.rs
--- a/rust/hg-core/src/dirstate.rs	Wed May 26 11:53:37 2021 +0200
+++ b/rust/hg-core/src/dirstate.rs	Tue May 25 09:20:30 2021 +0200
@@ -7,6 +7,7 @@
 
 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
 use crate::errors::HgError;
+use crate::revlog::node::NULL_NODE;
 use crate::revlog::Node;
 use crate::utils::hg_path::{HgPath, HgPathBuf};
 use crate::FastHashMap;
@@ -25,6 +26,13 @@
     pub p2: Node,
 }
 
+impl DirstateParents {
+    pub const NULL: Self = Self {
+        p1: NULL_NODE,
+        p2: NULL_NODE,
+    };
+}
+
 /// The C implementation uses all signed types. This will be an issue
 /// either when 4GB+ source files are commonplace or in 2038, whichever
 /// comes first.
--- a/rust/hg-core/src/dirstate_tree.rs	Wed May 26 11:53:37 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree.rs	Tue May 25 09:20:30 2021 +0200
@@ -2,4 +2,4 @@
 pub mod dispatch;
 pub mod on_disk;
 pub mod path_with_basename;
-mod status;
+pub mod status;
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Wed May 26 11:53:37 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Tue May 25 09:20:30 2021 +0200
@@ -167,6 +167,16 @@
     }
 }
 
+fn read_header(on_disk: &[u8]) -> Result<&Header, DirstateV2ParseError> {
+    let (header, _) =
+        Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
+    if header.marker == *V2_FORMAT_MARKER {
+        Ok(header)
+    } else {
+        Err(DirstateV2ParseError)
+    }
+}
+
 pub(super) fn read<'on_disk>(
     on_disk: &'on_disk [u8],
 ) -> Result<
@@ -176,27 +186,19 @@
     if on_disk.is_empty() {
         return Ok((DirstateMap::empty(on_disk), None));
     }
-    let (header, _) =
-        Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
-    let Header {
-        marker,
-        parents,
-        root,
-        nodes_with_entry_count,
-        nodes_with_copy_source_count,
-    } = header;
-    if marker != V2_FORMAT_MARKER {
-        return Err(DirstateV2ParseError);
-    }
+    let header = read_header(on_disk)?;
     let dirstate_map = DirstateMap {
         on_disk,
         root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
-            on_disk, *root,
+            on_disk,
+            header.root,
         )?),
-        nodes_with_entry_count: nodes_with_entry_count.get(),
-        nodes_with_copy_source_count: nodes_with_copy_source_count.get(),
+        nodes_with_entry_count: header.nodes_with_entry_count.get(),
+        nodes_with_copy_source_count: header
+            .nodes_with_copy_source_count
+            .get(),
     };
-    let parents = Some(parents.clone());
+    let parents = Some(header.parents.clone());
     Ok((dirstate_map, parents))
 }
 
@@ -414,6 +416,35 @@
         .ok_or_else(|| DirstateV2ParseError)
 }
 
+pub(crate) fn parse_dirstate_parents(
+    on_disk: &[u8],
+) -> Result<&DirstateParents, HgError> {
+    Ok(&read_header(on_disk)?.parents)
+}
+
+pub(crate) fn for_each_tracked_path<'on_disk>(
+    on_disk: &'on_disk [u8],
+    mut f: impl FnMut(&'on_disk HgPath),
+) -> Result<(), DirstateV2ParseError> {
+    let header = read_header(on_disk)?;
+    fn recur<'on_disk>(
+        on_disk: &'on_disk [u8],
+        nodes: Slice,
+        f: &mut impl FnMut(&'on_disk HgPath),
+    ) -> Result<(), DirstateV2ParseError> {
+        for node in read_slice::<Node>(on_disk, nodes)? {
+            if let Some(state) = node.state()? {
+                if state.is_tracked() {
+                    f(node.full_path(on_disk)?)
+                }
+            }
+            recur(on_disk, node.children, f)?
+        }
+        Ok(())
+    }
+    recur(on_disk, header.root, &mut f)
+}
+
 pub(super) fn write(
     dirstate_map: &mut DirstateMap,
     parents: DirstateParents,
--- a/rust/hg-core/src/operations/list_tracked_files.rs	Wed May 26 11:53:37 2021 +0200
+++ b/rust/hg-core/src/operations/list_tracked_files.rs	Tue May 25 09:20:30 2021 +0200
@@ -6,6 +6,7 @@
 // GNU General Public License version 2 or any later version.
 
 use crate::dirstate::parsers::parse_dirstate_entries;
+use crate::dirstate_tree::on_disk::for_each_tracked_path;
 use crate::errors::HgError;
 use crate::repo::Repo;
 use crate::revlog::changelog::Changelog;
@@ -13,6 +14,7 @@
 use crate::revlog::node::Node;
 use crate::revlog::revlog::RevlogError;
 use crate::utils::hg_path::HgPath;
+use crate::DirstateError;
 use rayon::prelude::*;
 
 /// List files under Mercurial control in the working directory
@@ -20,25 +22,34 @@
 pub struct Dirstate {
     /// The `dirstate` content.
     content: Vec<u8>,
+    dirstate_v2: bool,
 }
 
 impl Dirstate {
     pub fn new(repo: &Repo) -> Result<Self, HgError> {
-        let content = repo.hg_vfs().read("dirstate")?;
-        Ok(Self { content })
+        Ok(Self {
+            content: repo.hg_vfs().read("dirstate")?,
+            dirstate_v2: repo.has_dirstate_v2(),
+        })
     }
 
-    pub fn tracked_files(&self) -> Result<Vec<&HgPath>, HgError> {
+    pub fn tracked_files(&self) -> Result<Vec<&HgPath>, DirstateError> {
         let mut files = Vec::new();
-        let _parents = parse_dirstate_entries(
-            &self.content,
-            |path, entry, _copy_source| {
-                if entry.state.is_tracked() {
-                    files.push(path)
-                }
-                Ok(())
-            },
-        )?;
+        if !self.content.is_empty() {
+            if self.dirstate_v2 {
+                for_each_tracked_path(&self.content, |path| files.push(path))?
+            } else {
+                let _parents = parse_dirstate_entries(
+                    &self.content,
+                    |path, entry, _copy_source| {
+                        if entry.state.is_tracked() {
+                            files.push(path)
+                        }
+                        Ok(())
+                    },
+                )?;
+            }
+        }
         files.par_sort_unstable();
         Ok(files)
     }
--- a/rust/hg-core/src/repo.rs	Wed May 26 11:53:37 2021 +0200
+++ b/rust/hg-core/src/repo.rs	Tue May 25 09:20:30 2021 +0200
@@ -218,12 +218,23 @@
         }
     }
 
+    pub fn has_dirstate_v2(&self) -> bool {
+        self.requirements
+            .contains(requirements::DIRSTATE_V2_REQUIREMENT)
+    }
+
     pub fn dirstate_parents(
         &self,
     ) -> Result<crate::dirstate::DirstateParents, HgError> {
         let dirstate = self.hg_vfs().mmap_open("dirstate")?;
-        let parents =
-            crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?;
+        if dirstate.is_empty() {
+            return Ok(crate::dirstate::DirstateParents::NULL);
+        }
+        let parents = if self.has_dirstate_v2() {
+            crate::dirstate_tree::on_disk::parse_dirstate_parents(&dirstate)?
+        } else {
+            crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
+        };
         Ok(parents.clone())
     }
 }
--- a/rust/hg-core/src/requirements.rs	Wed May 26 11:53:37 2021 +0200
+++ b/rust/hg-core/src/requirements.rs	Tue May 25 09:20:30 2021 +0200
@@ -82,6 +82,7 @@
     SPARSEREVLOG_REQUIREMENT,
     RELATIVE_SHARED_REQUIREMENT,
     REVLOG_COMPRESSION_ZSTD,
+    DIRSTATE_V2_REQUIREMENT,
     // As of this writing everything rhg does is read-only.
     // When it starts writing to the repository, it’ll need to either keep the
     // persistent nodemap up to date or remove this entry:
@@ -90,6 +91,8 @@
 
 // Copied from mercurial/requirements.py:
 
+pub(crate) const DIRSTATE_V2_REQUIREMENT: &str = "exp-dirstate-v2";
+
 /// When narrowing is finalized and no longer subject to format changes,
 /// we should move this to just "narrow" or similar.
 #[allow(unused)]
--- a/rust/rhg/src/commands/status.rs	Wed May 26 11:53:37 2021 +0200
+++ b/rust/rhg/src/commands/status.rs	Tue May 25 09:20:30 2021 +0200
@@ -9,6 +9,7 @@
 use crate::ui::Ui;
 use clap::{Arg, SubCommand};
 use hg;
+use hg::dirstate_tree::dirstate_map::DirstateMap;
 use hg::errors::HgResultExt;
 use hg::errors::IoResultExt;
 use hg::matchers::AlwaysMatcher;
@@ -16,7 +17,7 @@
 use hg::repo::Repo;
 use hg::revlog::node::Node;
 use hg::utils::hg_path::{hg_path_to_os_string, HgPath};
-use hg::{DirstateMap, StatusError};
+use hg::StatusError;
 use hg::{HgPathCow, StatusOptions};
 use log::{info, warn};
 use std::convert::TryInto;
@@ -164,14 +165,17 @@
     };
 
     let repo = invocation.repo?;
-    let mut dmap = DirstateMap::new();
     let dirstate_data =
         repo.hg_vfs().mmap_open("dirstate").io_not_found_as_none()?;
     let dirstate_data = match &dirstate_data {
         Some(mmap) => &**mmap,
         None => b"",
     };
-    let parents = dmap.read(dirstate_data)?;
+    let (mut dmap, parents) = if repo.has_dirstate_v2() {
+        DirstateMap::new_v2(dirstate_data)?
+    } else {
+        DirstateMap::new_v1(dirstate_data)?
+    };
     let options = StatusOptions {
         // TODO should be provided by the dirstate parsing and
         // hence be stored on dmap. Using a value that assumes we aren't
@@ -187,8 +191,8 @@
         collect_traversed_dirs: false,
     };
     let ignore_file = repo.working_directory_vfs().join(".hgignore"); // TODO hardcoded
-    let (mut ds_status, pattern_warnings) = hg::status(
-        &dmap,
+    let (mut ds_status, pattern_warnings) = hg::dirstate_tree::status::status(
+        &mut dmap,
         &AlwaysMatcher,
         repo.working_directory_path().to_owned(),
         vec![ignore_file],