rhg: Add Repo::write_dirstate
authorSimon Sapin <simon.sapin@octobus.net>
Thu, 02 Dec 2021 15:10:03 +0100
changeset 48421 2097f63575a5
parent 48420 c7c23bb036c9
child 48422 000130cfafb6
rhg: Add Repo::write_dirstate This method is not used yet. It saves to disk any mutation that was done to the `Repo`’s dirstate through `Repo::dirstate_map_mut`. It takes care of dirstate-v1 v.s. dirstate-v2, dockets, data files, appending when possible, etc. Differential Revision: https://phab.mercurial-scm.org/D11839
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/on_disk.rs
rust/hg-core/src/repo.rs
rust/hg-core/src/revlog/node.rs
rust/hg-cpython/src/dirstate/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Thu Dec 02 12:05:36 2021 +0100
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Thu Dec 02 15:10:03 2021 +0100
@@ -951,7 +951,7 @@
     pub fn pack_v2(
         &self,
         can_append: bool,
-    ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
+    ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool), DirstateError> {
         let map = self.get_map();
         on_disk::write(map, can_append)
     }
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Thu Dec 02 12:05:36 2021 +0100
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Thu Dec 02 15:10:03 2021 +0100
@@ -14,8 +14,10 @@
 use bytes_cast::unaligned::{U16Be, U32Be};
 use bytes_cast::BytesCast;
 use format_bytes::format_bytes;
+use rand::Rng;
 use std::borrow::Cow;
 use std::convert::{TryFrom, TryInto};
+use std::fmt::Write;
 
 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
 /// This a redundant sanity check more than an actual "magic number" since
@@ -68,7 +70,7 @@
 /// section of `mercurial/helptext/internals/dirstate-v2.txt`
 #[derive(BytesCast)]
 #[repr(C)]
-struct TreeMetadata {
+pub struct TreeMetadata {
     root_nodes: ChildNodes,
     nodes_with_entry_count: Size,
     nodes_with_copy_source_count: Size,
@@ -186,7 +188,51 @@
     }
 }
 
+impl TreeMetadata {
+    pub fn as_bytes(&self) -> &[u8] {
+        BytesCast::as_bytes(self)
+    }
+}
+
 impl<'on_disk> Docket<'on_disk> {
+    /// Generate the identifier for a new data file
+    ///
+    /// TODO: support the `HGTEST_UUIDFILE` environment variable.
+    /// See `mercurial/revlogutils/docket.py`
+    pub fn new_uid() -> String {
+        const ID_LENGTH: usize = 8;
+        let mut id = String::with_capacity(ID_LENGTH);
+        let mut rng = rand::thread_rng();
+        for _ in 0..ID_LENGTH {
+            // One random hexadecimal digit.
+            // `unwrap` never panics because `impl Write for String`
+            // never returns an error.
+            write!(&mut id, "{:x}", rng.gen_range(0, 16)).unwrap();
+        }
+        id
+    }
+
+    pub fn serialize(
+        parents: DirstateParents,
+        tree_metadata: TreeMetadata,
+        data_size: u64,
+        uuid: &[u8],
+    ) -> Result<Vec<u8>, std::num::TryFromIntError> {
+        let header = DocketHeader {
+            marker: *V2_FORMAT_MARKER,
+            parent_1: parents.p1.pad_to_256_bits(),
+            parent_2: parents.p2.pad_to_256_bits(),
+            metadata: tree_metadata,
+            data_size: u32::try_from(data_size)?.into(),
+            uuid_size: uuid.len().try_into()?,
+        };
+        let header = header.as_bytes();
+        let mut docket = Vec::with_capacity(header.len() + uuid.len());
+        docket.extend_from_slice(header);
+        docket.extend_from_slice(uuid);
+        Ok(docket)
+    }
+
     pub fn parents(&self) -> DirstateParents {
         use crate::Node;
         let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
@@ -555,7 +601,7 @@
 pub(super) fn write(
     dirstate_map: &DirstateMap,
     can_append: bool,
-) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
+) -> Result<(Vec<u8>, TreeMetadata, bool), DirstateError> {
     let append = can_append && dirstate_map.write_should_append();
 
     // This ignores the space for paths, and for nodes without an entry.
@@ -581,7 +627,7 @@
         unused: [0; 4],
         ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
     };
-    Ok((writer.out, meta.as_bytes().to_vec(), append))
+    Ok((writer.out, meta, append))
 }
 
 struct Writer<'dmap, 'on_disk> {
--- a/rust/hg-core/src/repo.rs	Thu Dec 02 12:05:36 2021 +0100
+++ b/rust/hg-core/src/repo.rs	Thu Dec 02 15:10:03 2021 +0100
@@ -2,9 +2,10 @@
 use crate::config::{Config, ConfigError, ConfigParseError};
 use crate::dirstate::DirstateParents;
 use crate::dirstate_tree::dirstate_map::DirstateMap;
+use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
 use crate::dirstate_tree::owning::OwningDirstateMap;
-use crate::errors::HgError;
 use crate::errors::HgResultExt;
+use crate::errors::{HgError, IoResultExt};
 use crate::exit_codes;
 use crate::lock::{try_with_lock_no_wait, LockError};
 use crate::manifest::{Manifest, Manifestlog};
@@ -18,6 +19,9 @@
 use crate::{DirstateError, Revision};
 use std::cell::{Ref, RefCell, RefMut};
 use std::collections::HashSet;
+use std::io::Seek;
+use std::io::SeekFrom;
+use std::io::Write as IoWrite;
 use std::path::{Path, PathBuf};
 
 /// A repository on disk
@@ -416,6 +420,70 @@
     pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
         Filelog::open(self, path)
     }
+
+    /// Write to disk any updates that were made through `dirstate_map_mut`.
+    ///
+    /// The "wlock" must be held while calling this.
+    /// See for example `try_with_wlock_no_wait`.
+    ///
+    /// TODO: have a `WritableRepo` type only accessible while holding the
+    /// lock?
+    pub fn write_dirstate(&self) -> Result<(), DirstateError> {
+        let map = self.dirstate_map()?;
+        // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
+        // it’s unset
+        let parents = self.dirstate_parents()?;
+        let packed_dirstate = if self.has_dirstate_v2() {
+            let uuid = self.dirstate_data_file_uuid.get_or_init(self)?;
+            let mut uuid = uuid.as_ref();
+            let can_append = uuid.is_some();
+            let (data, tree_metadata, append) = map.pack_v2(can_append)?;
+            if !append {
+                uuid = None
+            }
+            let uuid = if let Some(uuid) = uuid {
+                std::str::from_utf8(uuid)
+                    .map_err(|_| {
+                        HgError::corrupted("non-UTF-8 dirstate data file ID")
+                    })?
+                    .to_owned()
+            } else {
+                DirstateDocket::new_uid()
+            };
+            let data_filename = format!("dirstate.{}", uuid);
+            let data_filename = self.hg_vfs().join(data_filename);
+            let mut options = std::fs::OpenOptions::new();
+            if append {
+                options.append(true);
+            } else {
+                options.write(true).create_new(true);
+            }
+            let data_size = (|| {
+                // TODO: loop and try another random ID if !append and this
+                // returns `ErrorKind::AlreadyExists`? Collision chance of two
+                // random IDs is one in 2**32
+                let mut file = options.open(&data_filename)?;
+                file.write_all(&data)?;
+                file.flush()?;
+                // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+
+                file.seek(SeekFrom::Current(0))
+            })()
+            .when_writing_file(&data_filename)?;
+            DirstateDocket::serialize(
+                parents,
+                tree_metadata,
+                data_size,
+                uuid.as_bytes(),
+            )
+            .map_err(|_: std::num::TryFromIntError| {
+                HgError::corrupted("overflow in dirstate docket serialization")
+            })?
+        } else {
+            map.pack_v1(parents)?
+        };
+        self.hg_vfs().atomic_write("dirstate", &packed_dirstate)?;
+        Ok(())
+    }
 }
 
 /// Lazily-initialized component of `Repo` with interior mutability
--- a/rust/hg-core/src/revlog/node.rs	Thu Dec 02 12:05:36 2021 +0100
+++ b/rust/hg-core/src/revlog/node.rs	Thu Dec 02 15:10:03 2021 +0100
@@ -174,6 +174,12 @@
             data: self.data,
         }
     }
+
+    pub fn pad_to_256_bits(&self) -> [u8; 32] {
+        let mut bits = [0; 32];
+        bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data);
+        bits
+    }
 }
 
 /// The beginning of a binary revision SHA.
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs	Thu Dec 02 12:05:36 2021 +0100
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs	Thu Dec 02 15:10:03 2021 +0100
@@ -222,7 +222,7 @@
         match result {
             Ok((packed, tree_metadata, append)) => {
                 let packed = PyBytes::new(py, &packed);
-                let tree_metadata = PyBytes::new(py, &tree_metadata);
+                let tree_metadata = PyBytes::new(py, tree_metadata.as_bytes());
                 let tuple = (packed, tree_metadata, append);
                 Ok(tuple.to_py_object(py).into_object())
             },