rust/hg-core/src/dirstate_tree/on_disk.rs
author Simon Sapin <simon.sapin@octobus.net>
Thu, 15 Jul 2021 23:02:17 +0200
changeset 47682 78f7f0d490ee
parent 47681 d94118365ec5
child 47684 852262e2e7d9
permissions -rw-r--r--
dirstate-v2: Move fixed-size tree metadata into the docket file Before this changeset, the dirstate-v2 data file contained not only nodes and paths that may be reused when appending to an existing file, but also some fixed-size metadata that applies to the entire tree and was added at the end of the data file for every append. This moves that metadata into the docket file, so that repeated "append" operations without meaningful changes don’t actually need to grow any file. Differential Revision: https://phab.mercurial-scm.org/D11098

//! The "version 2" disk representation of the dirstate
//!
//! # File format
//!
//! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
//! with a fixed-sized header whose layout is defined by the `DocketHeader`
//! struct, followed by the data file identifier.
//!
//! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
//! file may be longer than the size given in the docket, but not shorter. Only
//! the start of the data file up to the given size is considered. The
//! fixed-size "root" of the dirstate tree whose layout is defined by the
//! `Root` struct is found at the end of that slice of data.
//!
//! Its `root_nodes` field contains the slice (offset and length) to
//! the nodes representing the files and directories at the root of the
//! repository. Each node is also fixed-size, defined by the `Node` struct.
//! Nodes in turn contain slices to variable-size paths, and to their own child
//! nodes (if any) for nested files and directories.

use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
use crate::dirstate_tree::path_with_basename::WithBasename;
use crate::errors::HgError;
use crate::utils::hg_path::HgPath;
use crate::DirstateEntry;
use crate::DirstateError;
use crate::DirstateParents;
use crate::EntryState;
use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
use bytes_cast::BytesCast;
use format_bytes::format_bytes;
use std::borrow::Cow;
use std::convert::{TryFrom, TryInto};
use std::time::{Duration, SystemTime, UNIX_EPOCH};

/// Added at the start of `.hg/dirstate` when the "v2" format is used.
/// This a redundant sanity check more than an actual "magic number" since
/// `.hg/requires` already governs which format should be used.
pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";

/// Keep space for 256-bit hashes
const STORED_NODE_ID_BYTES: usize = 32;

/// … even though only 160 bits are used for now, with SHA-1
const USED_NODE_ID_BYTES: usize = 20;

pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];

/// Must match the constant of the same name in
/// `mercurial/dirstateutils/docket.py`
const TREE_METADATA_SIZE: usize = 40;

/// Make sure that size-affecting changes are made knowingly
#[allow(unused)]
fn static_assert_size_of() {
    let _ = std::mem::transmute::<DocketHeader, [u8; 121]>;
    let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>;
    let _ = std::mem::transmute::<Node, [u8; 43]>;
}

// Must match `HEADER` in `mercurial/dirstateutils/docket.py`
#[derive(BytesCast)]
#[repr(C)]
struct DocketHeader {
    marker: [u8; V2_FORMAT_MARKER.len()],
    parent_1: [u8; STORED_NODE_ID_BYTES],
    parent_2: [u8; STORED_NODE_ID_BYTES],

    /// Counted in bytes
    data_size: Size,

    metadata: TreeMetadata,

    uuid_size: u8,
}

pub struct Docket<'on_disk> {
    header: &'on_disk DocketHeader,
    uuid: &'on_disk [u8],
}

#[derive(BytesCast)]
#[repr(C)]
struct TreeMetadata {
    root_nodes: ChildNodes,
    nodes_with_entry_count: Size,
    nodes_with_copy_source_count: Size,

    /// How many bytes of this data file are not used anymore
    unreachable_bytes: Size,

    /// If non-zero, a hash of ignore files that were used for some previous
    /// run of the `status` algorithm.
    ///
    /// We define:
    ///
    /// * "Root" ignore files are `.hgignore` at the root of the repository if
    ///   it exists, and files from `ui.ignore.*` config. This set of files is
    ///   then sorted by the string representation of their path.
    /// * The "expanded contents" of an ignore files is the byte string made
    ///   by concatenating its contents with the "expanded contents" of other
    ///   files included with `include:` or `subinclude:` files, in inclusion
    ///   order. This definition is recursive, as included files can
    ///   themselves include more files.
    ///
    /// This hash is defined as the SHA-1 of the concatenation (in sorted
    /// order) of the "expanded contents" of each "root" ignore file.
    /// (Note that computing this does not require actually concatenating byte
    /// strings into contiguous memory, instead SHA-1 hashing can be done
    /// incrementally.)
    ignore_patterns_hash: IgnorePatternsHash,
}

#[derive(BytesCast)]
#[repr(C)]
pub(super) struct Node {
    full_path: PathSlice,

    /// In bytes from `self.full_path.start`
    base_name_start: PathSize,

    copy_source: OptPathSlice,
    children: ChildNodes,
    pub(super) descendants_with_entry_count: Size,
    pub(super) tracked_descendants_count: Size,

    /// Depending on the value of `state`:
    ///
    /// * A null byte: `data` is not used.
    ///
    /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
    ///   represent a dirstate entry like in the v1 format.
    ///
    /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
    ///   as the `Timestamp` for the mtime of a cached directory.
    ///
    ///   The presence of this state means that at some point, this path in
    ///   the working directory was observed:
    ///
    ///   - To be a directory
    ///   - With the modification time as given by `Timestamp`
    ///   - That timestamp was already strictly in the past when observed,
    ///     meaning that later changes cannot happen in the same clock tick
    ///     and must cause a different modification time (unless the system
    ///     clock jumps back and we get unlucky, which is not impossible but
    ///     but deemed unlikely enough).
    ///   - All direct children of this directory (as returned by
    ///     `std::fs::read_dir`) either have a corresponding dirstate node, or
    ///     are ignored by ignore patterns whose hash is in
    ///     `TreeMetadata::ignore_patterns_hash`.
    ///
    ///   This means that if `std::fs::symlink_metadata` later reports the
    ///   same modification time and ignored patterns haven’t changed, a run
    ///   of status that is not listing ignored   files can skip calling
    ///   `std::fs::read_dir` again for this directory,   iterate child
    ///   dirstate nodes instead.
    state: u8,
    data: Entry,
}

#[derive(BytesCast, Copy, Clone)]
#[repr(C)]
struct Entry {
    mode: I32Be,
    mtime: I32Be,
    size: I32Be,
}

/// Duration since the Unix epoch
#[derive(BytesCast, Copy, Clone, PartialEq)]
#[repr(C)]
pub(super) struct Timestamp {
    seconds: I64Be,

    /// In `0 .. 1_000_000_000`.
    ///
    /// This timestamp is later or earlier than `(seconds, 0)` by this many
    /// nanoseconds, if `seconds` is non-negative or negative, respectively.
    nanoseconds: U32Be,
}

/// Counted in bytes from the start of the file
///
/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
type Offset = U32Be;

/// Counted in number of items
///
/// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
type Size = U32Be;

/// Counted in bytes
///
/// NOTE: we choose not to support file names/paths longer than 64 KiB.
type PathSize = U16Be;

/// A contiguous sequence of `len` times `Node`, representing the child nodes
/// of either some other node or of the repository root.
///
/// Always sorted by ascending `full_path`, to allow binary search.
/// Since nodes with the same parent nodes also have the same parent path,
/// only the `base_name`s need to be compared during binary search.
#[derive(BytesCast, Copy, Clone)]
#[repr(C)]
struct ChildNodes {
    start: Offset,
    len: Size,
}

/// A `HgPath` of `len` bytes
#[derive(BytesCast, Copy, Clone)]
#[repr(C)]
struct PathSlice {
    start: Offset,
    len: PathSize,
}

/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
type OptPathSlice = PathSlice;

/// Unexpected file format found in `.hg/dirstate` with the "v2" format.
///
/// This should only happen if Mercurial is buggy or a repository is corrupted.
#[derive(Debug)]
pub struct DirstateV2ParseError;

impl From<DirstateV2ParseError> for HgError {
    fn from(_: DirstateV2ParseError) -> Self {
        HgError::corrupted("dirstate-v2 parse error")
    }
}

impl From<DirstateV2ParseError> for crate::DirstateError {
    fn from(error: DirstateV2ParseError) -> Self {
        HgError::from(error).into()
    }
}

impl<'on_disk> Docket<'on_disk> {
    pub fn parents(&self) -> DirstateParents {
        use crate::Node;
        let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
            .unwrap()
            .clone();
        let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
            .unwrap()
            .clone();
        DirstateParents { p1, p2 }
    }

    pub fn tree_metadata(&self) -> &[u8] {
        self.header.metadata.as_bytes()
    }

    pub fn data_size(&self) -> usize {
        // This `unwrap` could only panic on a 16-bit CPU
        self.header.data_size.get().try_into().unwrap()
    }

    pub fn data_filename(&self) -> String {
        String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
    }
}

pub fn read_docket(
    on_disk: &[u8],
) -> Result<Docket<'_>, DirstateV2ParseError> {
    let (header, uuid) =
        DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
    let uuid_size = header.uuid_size as usize;
    if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
        Ok(Docket { header, uuid })
    } else {
        Err(DirstateV2ParseError)
    }
}

pub(super) fn read<'on_disk>(
    on_disk: &'on_disk [u8],
    metadata: &[u8],
) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
    if on_disk.is_empty() {
        return Ok(DirstateMap::empty(on_disk));
    }
    let (meta, _) = TreeMetadata::from_bytes(metadata)
        .map_err(|_| DirstateV2ParseError)?;
    let dirstate_map = DirstateMap {
        on_disk,
        root: dirstate_map::ChildNodes::OnDisk(read_nodes(
            on_disk,
            meta.root_nodes,
        )?),
        nodes_with_entry_count: meta.nodes_with_entry_count.get(),
        nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
        ignore_patterns_hash: meta.ignore_patterns_hash,
        unreachable_bytes: meta.unreachable_bytes.get(),
    };
    Ok(dirstate_map)
}

impl Node {
    pub(super) fn full_path<'on_disk>(
        &self,
        on_disk: &'on_disk [u8],
    ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
        read_hg_path(on_disk, self.full_path)
    }

    pub(super) fn base_name_start<'on_disk>(
        &self,
    ) -> Result<usize, DirstateV2ParseError> {
        let start = self.base_name_start.get();
        if start < self.full_path.len.get() {
            let start = usize::try_from(start)
                // u32 -> usize, could only panic on a 16-bit CPU
                .expect("dirstate-v2 base_name_start out of bounds");
            Ok(start)
        } else {
            Err(DirstateV2ParseError)
        }
    }

    pub(super) fn base_name<'on_disk>(
        &self,
        on_disk: &'on_disk [u8],
    ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
        let full_path = self.full_path(on_disk)?;
        let base_name_start = self.base_name_start()?;
        Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
    }

    pub(super) fn path<'on_disk>(
        &self,
        on_disk: &'on_disk [u8],
    ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
        Ok(WithBasename::from_raw_parts(
            Cow::Borrowed(self.full_path(on_disk)?),
            self.base_name_start()?,
        ))
    }

    pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
        self.copy_source.start.get() != 0
    }

    pub(super) fn copy_source<'on_disk>(
        &self,
        on_disk: &'on_disk [u8],
    ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
        Ok(if self.has_copy_source() {
            Some(read_hg_path(on_disk, self.copy_source)?)
        } else {
            None
        })
    }

    pub(super) fn node_data(
        &self,
    ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
        let entry = |state| {
            dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
        };

        match self.state {
            b'\0' => Ok(dirstate_map::NodeData::None),
            b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
                mtime: *self.data.as_timestamp(),
            }),
            b'n' => Ok(entry(EntryState::Normal)),
            b'a' => Ok(entry(EntryState::Added)),
            b'r' => Ok(entry(EntryState::Removed)),
            b'm' => Ok(entry(EntryState::Merged)),
            _ => Err(DirstateV2ParseError),
        }
    }

    pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
        if self.state == b'd' {
            Some(self.data.as_timestamp())
        } else {
            None
        }
    }

    pub(super) fn state(
        &self,
    ) -> Result<Option<EntryState>, DirstateV2ParseError> {
        match self.state {
            b'\0' | b'd' => Ok(None),
            b'n' => Ok(Some(EntryState::Normal)),
            b'a' => Ok(Some(EntryState::Added)),
            b'r' => Ok(Some(EntryState::Removed)),
            b'm' => Ok(Some(EntryState::Merged)),
            _ => Err(DirstateV2ParseError),
        }
    }

    fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
        DirstateEntry {
            state,
            mode: self.data.mode.get(),
            mtime: self.data.mtime.get(),
            size: self.data.size.get(),
        }
    }

    pub(super) fn entry(
        &self,
    ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
        Ok(self
            .state()?
            .map(|state| self.entry_with_given_state(state)))
    }

    pub(super) fn children<'on_disk>(
        &self,
        on_disk: &'on_disk [u8],
    ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
        read_nodes(on_disk, self.children)
    }

    pub(super) fn to_in_memory_node<'on_disk>(
        &self,
        on_disk: &'on_disk [u8],
    ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
        Ok(dirstate_map::Node {
            children: dirstate_map::ChildNodes::OnDisk(
                self.children(on_disk)?,
            ),
            copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
            data: self.node_data()?,
            descendants_with_entry_count: self
                .descendants_with_entry_count
                .get(),
            tracked_descendants_count: self.tracked_descendants_count.get(),
        })
    }
}

impl Entry {
    fn from_timestamp(timestamp: Timestamp) -> Self {
        // Safety: both types implement the `ByteCast` trait, so we could
        // safely use `as_bytes` and `from_bytes` to do this conversion. Using
        // `transmute` instead makes the compiler check that the two types
        // have the same size, which eliminates the error case of
        // `from_bytes`.
        unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
    }

    fn as_timestamp(&self) -> &Timestamp {
        // Safety: same as above in `from_timestamp`
        unsafe { &*(self as *const Entry as *const Timestamp) }
    }
}

impl Timestamp {
    pub fn seconds(&self) -> i64 {
        self.seconds.get()
    }
}

impl From<SystemTime> for Timestamp {
    fn from(system_time: SystemTime) -> Self {
        let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
            Ok(duration) => {
                (duration.as_secs() as i64, duration.subsec_nanos())
            }
            Err(error) => {
                let negative = error.duration();
                (-(negative.as_secs() as i64), negative.subsec_nanos())
            }
        };
        Timestamp {
            seconds: secs.into(),
            nanoseconds: nanos.into(),
        }
    }
}

impl From<&'_ Timestamp> for SystemTime {
    fn from(timestamp: &'_ Timestamp) -> Self {
        let secs = timestamp.seconds.get();
        let nanos = timestamp.nanoseconds.get();
        if secs >= 0 {
            UNIX_EPOCH + Duration::new(secs as u64, nanos)
        } else {
            UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
        }
    }
}

fn read_hg_path(
    on_disk: &[u8],
    slice: PathSlice,
) -> Result<&HgPath, DirstateV2ParseError> {
    read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
}

fn read_nodes(
    on_disk: &[u8],
    slice: ChildNodes,
) -> Result<&[Node], DirstateV2ParseError> {
    read_slice(on_disk, slice.start, slice.len.get())
}

fn read_slice<T, Len>(
    on_disk: &[u8],
    start: Offset,
    len: Len,
) -> Result<&[T], DirstateV2ParseError>
where
    T: BytesCast,
    Len: TryInto<usize>,
{
    // Either `usize::MAX` would result in "out of bounds" error since a single
    // `&[u8]` cannot occupy the entire addess space.
    let start = start.get().try_into().unwrap_or(std::usize::MAX);
    let len = len.try_into().unwrap_or(std::usize::MAX);
    on_disk
        .get(start..)
        .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
        .map(|(slice, _rest)| slice)
        .ok_or_else(|| DirstateV2ParseError)
}

pub(crate) fn for_each_tracked_path<'on_disk>(
    on_disk: &'on_disk [u8],
    metadata: &[u8],
    mut f: impl FnMut(&'on_disk HgPath),
) -> Result<(), DirstateV2ParseError> {
    let (meta, _) = TreeMetadata::from_bytes(metadata)
        .map_err(|_| DirstateV2ParseError)?;
    fn recur<'on_disk>(
        on_disk: &'on_disk [u8],
        nodes: ChildNodes,
        f: &mut impl FnMut(&'on_disk HgPath),
    ) -> Result<(), DirstateV2ParseError> {
        for node in read_nodes(on_disk, nodes)? {
            if let Some(state) = node.state()? {
                if state.is_tracked() {
                    f(node.full_path(on_disk)?)
                }
            }
            recur(on_disk, node.children, f)?
        }
        Ok(())
    }
    recur(on_disk, meta.root_nodes, &mut f)
}

/// Returns new data and metadata, together with whether that data should be
/// appended to the existing data file whose content is at
/// `dirstate_map.on_disk` (true), instead of written to a new data file
/// (false).
pub(super) fn write(
    dirstate_map: &mut DirstateMap,
    can_append: bool,
) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
    let append = can_append && dirstate_map.write_should_append();

    // This ignores the space for paths, and for nodes without an entry.
    // TODO: better estimate? Skip the `Vec` and write to a file directly?
    let size_guess = std::mem::size_of::<Node>()
        * dirstate_map.nodes_with_entry_count as usize;

    let mut writer = Writer {
        dirstate_map,
        append,
        out: Vec::with_capacity(size_guess),
    };

    let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;

    let meta = TreeMetadata {
        root_nodes,
        nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
        nodes_with_copy_source_count: dirstate_map
            .nodes_with_copy_source_count
            .into(),
        unreachable_bytes: dirstate_map.unreachable_bytes.into(),
        ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
    };
    Ok((writer.out, meta.as_bytes().to_vec(), append))
}

struct Writer<'dmap, 'on_disk> {
    dirstate_map: &'dmap DirstateMap<'on_disk>,
    append: bool,
    out: Vec<u8>,
}

impl Writer<'_, '_> {
    fn write_nodes(
        &mut self,
        nodes: dirstate_map::ChildNodesRef,
    ) -> Result<ChildNodes, DirstateError> {
        // Reuse already-written nodes if possible
        if self.append {
            if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
                let start = self.on_disk_offset_of(nodes_slice).expect(
                    "dirstate-v2 OnDisk nodes not found within on_disk",
                );
                let len = child_nodes_len_from_usize(nodes_slice.len());
                return Ok(ChildNodes { start, len });
            }
        }

        // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
        // undefined iteration order. Sort to enable binary search in the
        // written file.
        let nodes = nodes.sorted();
        let nodes_len = nodes.len();

        // First accumulate serialized nodes in a `Vec`
        let mut on_disk_nodes = Vec::with_capacity(nodes_len);
        for node in nodes {
            let children =
                self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
            let full_path = node.full_path(self.dirstate_map.on_disk)?;
            let full_path = self.write_path(full_path.as_bytes());
            let copy_source = if let Some(source) =
                node.copy_source(self.dirstate_map.on_disk)?
            {
                self.write_path(source.as_bytes())
            } else {
                PathSlice {
                    start: 0.into(),
                    len: 0.into(),
                }
            };
            on_disk_nodes.push(match node {
                NodeRef::InMemory(path, node) => {
                    let (state, data) = match &node.data {
                        dirstate_map::NodeData::Entry(entry) => (
                            entry.state.into(),
                            Entry {
                                mode: entry.mode.into(),
                                mtime: entry.mtime.into(),
                                size: entry.size.into(),
                            },
                        ),
                        dirstate_map::NodeData::CachedDirectory { mtime } => {
                            (b'd', Entry::from_timestamp(*mtime))
                        }
                        dirstate_map::NodeData::None => (
                            b'\0',
                            Entry {
                                mode: 0.into(),
                                mtime: 0.into(),
                                size: 0.into(),
                            },
                        ),
                    };
                    Node {
                        children,
                        copy_source,
                        full_path,
                        base_name_start: u16::try_from(path.base_name_start())
                            // Could only panic for paths over 64 KiB
                            .expect("dirstate-v2 path length overflow")
                            .into(),
                        descendants_with_entry_count: node
                            .descendants_with_entry_count
                            .into(),
                        tracked_descendants_count: node
                            .tracked_descendants_count
                            .into(),
                        state,
                        data,
                    }
                }
                NodeRef::OnDisk(node) => Node {
                    children,
                    copy_source,
                    full_path,
                    ..*node
                },
            })
        }
        // … so we can write them contiguously, after writing everything else
        // they refer to.
        let start = self.current_offset();
        let len = child_nodes_len_from_usize(nodes_len);
        self.out.extend(on_disk_nodes.as_bytes());
        Ok(ChildNodes { start, len })
    }

    /// If the given slice of items is within `on_disk`, returns its offset
    /// from the start of `on_disk`.
    fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>
    where
        T: BytesCast,
    {
        fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
            let start = slice.as_ptr() as usize;
            let end = start + slice.len();
            start..=end
        }
        let slice_addresses = address_range(slice.as_bytes());
        let on_disk_addresses = address_range(self.dirstate_map.on_disk);
        if on_disk_addresses.contains(slice_addresses.start())
            && on_disk_addresses.contains(slice_addresses.end())
        {
            let offset = slice_addresses.start() - on_disk_addresses.start();
            Some(offset_from_usize(offset))
        } else {
            None
        }
    }

    fn current_offset(&mut self) -> Offset {
        let mut offset = self.out.len();
        if self.append {
            offset += self.dirstate_map.on_disk.len()
        }
        offset_from_usize(offset)
    }

    fn write_path(&mut self, slice: &[u8]) -> PathSlice {
        let len = path_len_from_usize(slice.len());
        // Reuse an already-written path if possible
        if self.append {
            if let Some(start) = self.on_disk_offset_of(slice) {
                return PathSlice { start, len };
            }
        }
        let start = self.current_offset();
        self.out.extend(slice.as_bytes());
        PathSlice { start, len }
    }
}

fn offset_from_usize(x: usize) -> Offset {
    u32::try_from(x)
        // Could only panic for a dirstate file larger than 4 GiB
        .expect("dirstate-v2 offset overflow")
        .into()
}

fn child_nodes_len_from_usize(x: usize) -> Size {
    u32::try_from(x)
        // Could only panic with over 4 billion nodes
        .expect("dirstate-v2 slice length overflow")
        .into()
}

fn path_len_from_usize(x: usize) -> PathSize {
    u16::try_from(x)
        // Could only panic for paths over 64 KiB
        .expect("dirstate-v2 path length overflow")
        .into()
}