rust/hg-core/src/dirstate_tree/on_disk.rs
author Simon Sapin <simon.sapin@octobus.net>
Thu, 15 Jul 2021 08:53:03 +0200
changeset 47679 731286dc5321
parent 47678 065e61628980
child 47680 a8b0f29dc0d7
permissions -rw-r--r--
dirstate-v2: Reuse existing nodes when appending to a data file When writing a dirstate in v2 format by appending to an existing data file, nodes that are still "unparsed" from the previous on-disk representation have been unchanged and can therefore be reused. This makes the new data point to previously-existing data for tree nodes. Differential Revision: https://phab.mercurial-scm.org/D11095

//! The "version 2" disk representation of the dirstate
//!
//! # File format
//!
//! In dirstate-v2 format, the `.hg/dirstate` file is a "docket that starts
//! with a fixed-sized header whose layout is defined by the `DocketHeader`
//! struct, followed by the data file identifier.
//!
//! A separate `.hg/dirstate.{uuid}.d` file contains most of the data. That
//! file may be longer than the size given in the docket, but not shorter. Only
//! the start of the data file up to the given size is considered. The
//! fixed-size "root" of the dirstate tree whose layout is defined by the
//! `Root` struct is found at the end of that slice of data.
//!
//! Its `root_nodes` field contains the slice (offset and length) to
//! the nodes representing the files and directories at the root of the
//! repository. Each node is also fixed-size, defined by the `Node` struct.
//! Nodes in turn contain slices to variable-size paths, and to their own child
//! nodes (if any) for nested files and directories.

use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
use crate::dirstate_tree::path_with_basename::WithBasename;
use crate::errors::HgError;
use crate::utils::hg_path::HgPath;
use crate::DirstateEntry;
use crate::DirstateError;
use crate::DirstateParents;
use crate::EntryState;
use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
use bytes_cast::BytesCast;
use format_bytes::format_bytes;
use std::borrow::Cow;
use std::convert::{TryFrom, TryInto};
use std::time::{Duration, SystemTime, UNIX_EPOCH};

/// Added at the start of `.hg/dirstate` when the "v2" format is used.
/// This a redundant sanity check more than an actual "magic number" since
/// `.hg/requires` already governs which format should be used.
pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";

/// Keep space for 256-bit hashes
const STORED_NODE_ID_BYTES: usize = 32;

/// … even though only 160 bits are used for now, with SHA-1
const USED_NODE_ID_BYTES: usize = 20;

pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20;
pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN];

// Must match `HEADER` in `mercurial/dirstateutils/docket.py`
#[derive(BytesCast)]
#[repr(C)]
struct DocketHeader {
    marker: [u8; V2_FORMAT_MARKER.len()],
    parent_1: [u8; STORED_NODE_ID_BYTES],
    parent_2: [u8; STORED_NODE_ID_BYTES],

    /// Counted in bytes
    data_size: Size,

    uuid_size: u8,
}

pub struct Docket<'on_disk> {
    header: &'on_disk DocketHeader,
    uuid: &'on_disk [u8],
}

#[derive(BytesCast)]
#[repr(C)]
struct Root {
    root_nodes: ChildNodes,
    nodes_with_entry_count: Size,
    nodes_with_copy_source_count: Size,

    /// If non-zero, a hash of ignore files that were used for some previous
    /// run of the `status` algorithm.
    ///
    /// We define:
    ///
    /// * "Root" ignore files are `.hgignore` at the root of the repository if
    ///   it exists, and files from `ui.ignore.*` config. This set of files is
    ///   then sorted by the string representation of their path.
    /// * The "expanded contents" of an ignore files is the byte string made
    ///   by concatenating its contents with the "expanded contents" of other
    ///   files included with `include:` or `subinclude:` files, in inclusion
    ///   order. This definition is recursive, as included files can
    ///   themselves include more files.
    ///
    /// This hash is defined as the SHA-1 of the concatenation (in sorted
    /// order) of the "expanded contents" of each "root" ignore file.
    /// (Note that computing this does not require actually concatenating byte
    /// strings into contiguous memory, instead SHA-1 hashing can be done
    /// incrementally.)
    ignore_patterns_hash: IgnorePatternsHash,
}

#[derive(BytesCast)]
#[repr(C)]
pub(super) struct Node {
    full_path: PathSlice,

    /// In bytes from `self.full_path.start`
    base_name_start: PathSize,

    copy_source: OptPathSlice,
    children: ChildNodes,
    pub(super) descendants_with_entry_count: Size,
    pub(super) tracked_descendants_count: Size,

    /// Depending on the value of `state`:
    ///
    /// * A null byte: `data` is not used.
    ///
    /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
    ///   represent a dirstate entry like in the v1 format.
    ///
    /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
    ///   as the `Timestamp` for the mtime of a cached directory.
    ///
    ///   The presence of this state means that at some point, this path in
    ///   the working directory was observed:
    ///
    ///   - To be a directory
    ///   - With the modification time as given by `Timestamp`
    ///   - That timestamp was already strictly in the past when observed,
    ///     meaning that later changes cannot happen in the same clock tick
    ///     and must cause a different modification time (unless the system
    ///     clock jumps back and we get unlucky, which is not impossible but
    ///     but deemed unlikely enough).
    ///   - All direct children of this directory (as returned by
    ///     `std::fs::read_dir`) either have a corresponding dirstate node, or
    ///     are ignored by ignore patterns whose hash is in
    ///     `Root::ignore_patterns_hash`.
    ///
    ///   This means that if `std::fs::symlink_metadata` later reports the
    ///   same modification time and ignored patterns haven’t changed, a run
    ///   of status that is not listing ignored   files can skip calling
    ///   `std::fs::read_dir` again for this directory,   iterate child
    ///   dirstate nodes instead.
    state: u8,
    data: Entry,
}

#[derive(BytesCast, Copy, Clone)]
#[repr(C)]
struct Entry {
    mode: I32Be,
    mtime: I32Be,
    size: I32Be,
}

/// Duration since the Unix epoch
#[derive(BytesCast, Copy, Clone, PartialEq)]
#[repr(C)]
pub(super) struct Timestamp {
    seconds: I64Be,

    /// In `0 .. 1_000_000_000`.
    ///
    /// This timestamp is later or earlier than `(seconds, 0)` by this many
    /// nanoseconds, if `seconds` is non-negative or negative, respectively.
    nanoseconds: U32Be,
}

/// Counted in bytes from the start of the file
///
/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
type Offset = U32Be;

/// Counted in number of items
///
/// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
type Size = U32Be;

/// Counted in bytes
///
/// NOTE: we choose not to support file names/paths longer than 64 KiB.
type PathSize = U16Be;

/// A contiguous sequence of `len` times `Node`, representing the child nodes
/// of either some other node or of the repository root.
///
/// Always sorted by ascending `full_path`, to allow binary search.
/// Since nodes with the same parent nodes also have the same parent path,
/// only the `base_name`s need to be compared during binary search.
#[derive(BytesCast, Copy, Clone)]
#[repr(C)]
struct ChildNodes {
    start: Offset,
    len: Size,
}

/// A `HgPath` of `len` bytes
#[derive(BytesCast, Copy, Clone)]
#[repr(C)]
struct PathSlice {
    start: Offset,
    len: PathSize,
}

/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
type OptPathSlice = PathSlice;

/// Make sure that size-affecting changes are made knowingly
fn _static_assert_size_of() {
    let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
    let _ = std::mem::transmute::<Root, [u8; 36]>;
    let _ = std::mem::transmute::<Node, [u8; 43]>;
}

/// Unexpected file format found in `.hg/dirstate` with the "v2" format.
///
/// This should only happen if Mercurial is buggy or a repository is corrupted.
#[derive(Debug)]
pub struct DirstateV2ParseError;

impl From<DirstateV2ParseError> for HgError {
    fn from(_: DirstateV2ParseError) -> Self {
        HgError::corrupted("dirstate-v2 parse error")
    }
}

impl From<DirstateV2ParseError> for crate::DirstateError {
    fn from(error: DirstateV2ParseError) -> Self {
        HgError::from(error).into()
    }
}

impl<'on_disk> Docket<'on_disk> {
    pub fn parents(&self) -> DirstateParents {
        use crate::Node;
        let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
            .unwrap()
            .clone();
        let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES])
            .unwrap()
            .clone();
        DirstateParents { p1, p2 }
    }

    pub fn data_size(&self) -> usize {
        // This `unwrap` could only panic on a 16-bit CPU
        self.header.data_size.get().try_into().unwrap()
    }

    pub fn data_filename(&self) -> String {
        String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap()
    }
}

pub fn read_docket(
    on_disk: &[u8],
) -> Result<Docket<'_>, DirstateV2ParseError> {
    let (header, uuid) =
        DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?;
    let uuid_size = header.uuid_size as usize;
    if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size {
        Ok(Docket { header, uuid })
    } else {
        Err(DirstateV2ParseError)
    }
}

fn read_root<'on_disk>(
    on_disk: &'on_disk [u8],
) -> Result<&'on_disk Root, DirstateV2ParseError> {
    // Find the `Root` at the end of the given slice
    let root_offset = on_disk
        .len()
        .checked_sub(std::mem::size_of::<Root>())
        // A non-empty slice too short is an error
        .ok_or(DirstateV2ParseError)?;
    let (root, _) = Root::from_bytes(&on_disk[root_offset..])
        .map_err(|_| DirstateV2ParseError)?;
    Ok(root)
}

pub(super) fn read<'on_disk>(
    on_disk: &'on_disk [u8],
) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
    if on_disk.is_empty() {
        return Ok(DirstateMap::empty(on_disk));
    }
    let root = read_root(on_disk)?;
    let dirstate_map = DirstateMap {
        on_disk,
        root: dirstate_map::ChildNodes::OnDisk(read_nodes(
            on_disk,
            root.root_nodes,
        )?),
        nodes_with_entry_count: root.nodes_with_entry_count.get(),
        nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
        ignore_patterns_hash: root.ignore_patterns_hash,
    };
    Ok(dirstate_map)
}

impl Node {
    pub(super) fn full_path<'on_disk>(
        &self,
        on_disk: &'on_disk [u8],
    ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
        read_hg_path(on_disk, self.full_path)
    }

    pub(super) fn base_name_start<'on_disk>(
        &self,
    ) -> Result<usize, DirstateV2ParseError> {
        let start = self.base_name_start.get();
        if start < self.full_path.len.get() {
            let start = usize::try_from(start)
                // u32 -> usize, could only panic on a 16-bit CPU
                .expect("dirstate-v2 base_name_start out of bounds");
            Ok(start)
        } else {
            Err(DirstateV2ParseError)
        }
    }

    pub(super) fn base_name<'on_disk>(
        &self,
        on_disk: &'on_disk [u8],
    ) -> Result<&'on_disk HgPath, DirstateV2ParseError> {
        let full_path = self.full_path(on_disk)?;
        let base_name_start = self.base_name_start()?;
        Ok(HgPath::new(&full_path.as_bytes()[base_name_start..]))
    }

    pub(super) fn path<'on_disk>(
        &self,
        on_disk: &'on_disk [u8],
    ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> {
        Ok(WithBasename::from_raw_parts(
            Cow::Borrowed(self.full_path(on_disk)?),
            self.base_name_start()?,
        ))
    }

    pub(super) fn has_copy_source<'on_disk>(&self) -> bool {
        self.copy_source.start.get() != 0
    }

    pub(super) fn copy_source<'on_disk>(
        &self,
        on_disk: &'on_disk [u8],
    ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> {
        Ok(if self.has_copy_source() {
            Some(read_hg_path(on_disk, self.copy_source)?)
        } else {
            None
        })
    }

    pub(super) fn node_data(
        &self,
    ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
        let entry = |state| {
            dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
        };

        match self.state {
            b'\0' => Ok(dirstate_map::NodeData::None),
            b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
                mtime: *self.data.as_timestamp(),
            }),
            b'n' => Ok(entry(EntryState::Normal)),
            b'a' => Ok(entry(EntryState::Added)),
            b'r' => Ok(entry(EntryState::Removed)),
            b'm' => Ok(entry(EntryState::Merged)),
            _ => Err(DirstateV2ParseError),
        }
    }

    pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
        if self.state == b'd' {
            Some(self.data.as_timestamp())
        } else {
            None
        }
    }

    pub(super) fn state(
        &self,
    ) -> Result<Option<EntryState>, DirstateV2ParseError> {
        match self.state {
            b'\0' | b'd' => Ok(None),
            b'n' => Ok(Some(EntryState::Normal)),
            b'a' => Ok(Some(EntryState::Added)),
            b'r' => Ok(Some(EntryState::Removed)),
            b'm' => Ok(Some(EntryState::Merged)),
            _ => Err(DirstateV2ParseError),
        }
    }

    fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
        DirstateEntry {
            state,
            mode: self.data.mode.get(),
            mtime: self.data.mtime.get(),
            size: self.data.size.get(),
        }
    }

    pub(super) fn entry(
        &self,
    ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
        Ok(self
            .state()?
            .map(|state| self.entry_with_given_state(state)))
    }

    pub(super) fn children<'on_disk>(
        &self,
        on_disk: &'on_disk [u8],
    ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
        read_nodes(on_disk, self.children)
    }

    pub(super) fn to_in_memory_node<'on_disk>(
        &self,
        on_disk: &'on_disk [u8],
    ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> {
        Ok(dirstate_map::Node {
            children: dirstate_map::ChildNodes::OnDisk(
                self.children(on_disk)?,
            ),
            copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
            data: self.node_data()?,
            descendants_with_entry_count: self
                .descendants_with_entry_count
                .get(),
            tracked_descendants_count: self.tracked_descendants_count.get(),
        })
    }
}

impl Entry {
    fn from_timestamp(timestamp: Timestamp) -> Self {
        // Safety: both types implement the `ByteCast` trait, so we could
        // safely use `as_bytes` and `from_bytes` to do this conversion. Using
        // `transmute` instead makes the compiler check that the two types
        // have the same size, which eliminates the error case of
        // `from_bytes`.
        unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
    }

    fn as_timestamp(&self) -> &Timestamp {
        // Safety: same as above in `from_timestamp`
        unsafe { &*(self as *const Entry as *const Timestamp) }
    }
}

impl Timestamp {
    pub fn seconds(&self) -> i64 {
        self.seconds.get()
    }
}

impl From<SystemTime> for Timestamp {
    fn from(system_time: SystemTime) -> Self {
        let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
            Ok(duration) => {
                (duration.as_secs() as i64, duration.subsec_nanos())
            }
            Err(error) => {
                let negative = error.duration();
                (-(negative.as_secs() as i64), negative.subsec_nanos())
            }
        };
        Timestamp {
            seconds: secs.into(),
            nanoseconds: nanos.into(),
        }
    }
}

impl From<&'_ Timestamp> for SystemTime {
    fn from(timestamp: &'_ Timestamp) -> Self {
        let secs = timestamp.seconds.get();
        let nanos = timestamp.nanoseconds.get();
        if secs >= 0 {
            UNIX_EPOCH + Duration::new(secs as u64, nanos)
        } else {
            UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
        }
    }
}

fn read_hg_path(
    on_disk: &[u8],
    slice: PathSlice,
) -> Result<&HgPath, DirstateV2ParseError> {
    read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
}

fn read_nodes(
    on_disk: &[u8],
    slice: ChildNodes,
) -> Result<&[Node], DirstateV2ParseError> {
    read_slice(on_disk, slice.start, slice.len.get())
}

fn read_slice<T, Len>(
    on_disk: &[u8],
    start: Offset,
    len: Len,
) -> Result<&[T], DirstateV2ParseError>
where
    T: BytesCast,
    Len: TryInto<usize>,
{
    // Either `usize::MAX` would result in "out of bounds" error since a single
    // `&[u8]` cannot occupy the entire addess space.
    let start = start.get().try_into().unwrap_or(std::usize::MAX);
    let len = len.try_into().unwrap_or(std::usize::MAX);
    on_disk
        .get(start..)
        .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
        .map(|(slice, _rest)| slice)
        .ok_or_else(|| DirstateV2ParseError)
}

pub(crate) fn for_each_tracked_path<'on_disk>(
    on_disk: &'on_disk [u8],
    mut f: impl FnMut(&'on_disk HgPath),
) -> Result<(), DirstateV2ParseError> {
    let root = read_root(on_disk)?;
    fn recur<'on_disk>(
        on_disk: &'on_disk [u8],
        nodes: ChildNodes,
        f: &mut impl FnMut(&'on_disk HgPath),
    ) -> Result<(), DirstateV2ParseError> {
        for node in read_nodes(on_disk, nodes)? {
            if let Some(state) = node.state()? {
                if state.is_tracked() {
                    f(node.full_path(on_disk)?)
                }
            }
            recur(on_disk, node.children, f)?
        }
        Ok(())
    }
    recur(on_disk, root.root_nodes, &mut f)
}

/// Returns new data together with whether that data should be appended to the
/// existing data file whose content is at `dirstate_map.on_disk` (true),
/// instead of written to a new data file (false).
pub(super) fn write(
    dirstate_map: &mut DirstateMap,
    can_append: bool,
) -> Result<(Vec<u8>, bool), DirstateError> {
    let append = can_append && dirstate_map.write_should_append();

    // This ignores the space for paths, and for nodes without an entry.
    // TODO: better estimate? Skip the `Vec` and write to a file directly?
    let size_guess = std::mem::size_of::<Root>()
        + std::mem::size_of::<Node>()
            * dirstate_map.nodes_with_entry_count as usize;

    let mut writer = Writer {
        dirstate_map,
        append,
        out: Vec::with_capacity(size_guess),
    };

    let root_nodes = writer.write_nodes(dirstate_map.root.as_ref())?;

    let root = Root {
        root_nodes,
        nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(),
        nodes_with_copy_source_count: dirstate_map
            .nodes_with_copy_source_count
            .into(),
        ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
    };
    writer.out.extend(root.as_bytes());
    Ok((writer.out, append))
}

struct Writer<'dmap, 'on_disk> {
    dirstate_map: &'dmap DirstateMap<'on_disk>,
    append: bool,
    out: Vec<u8>,
}

impl Writer<'_, '_> {
    fn write_nodes(
        &mut self,
        nodes: dirstate_map::ChildNodesRef,
    ) -> Result<ChildNodes, DirstateError> {
        // Reuse already-written nodes if possible
        if self.append {
            if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes {
                let start = self.offset_of(nodes_slice);
                let len = child_nodes_len_from_usize(nodes_slice.len());
                return Ok(ChildNodes { start, len });
            }
        }

        // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has
        // undefined iteration order. Sort to enable binary search in the
        // written file.
        let nodes = nodes.sorted();
        let nodes_len = nodes.len();

        // First accumulate serialized nodes in a `Vec`
        let mut on_disk_nodes = Vec::with_capacity(nodes_len);
        for node in nodes {
            let children =
                self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
            let full_path = node.full_path(self.dirstate_map.on_disk)?;
            let full_path = self.write_path(full_path.as_bytes());
            let copy_source = if let Some(source) =
                node.copy_source(self.dirstate_map.on_disk)?
            {
                self.write_path(source.as_bytes())
            } else {
                PathSlice {
                    start: 0.into(),
                    len: 0.into(),
                }
            };
            on_disk_nodes.push(match node {
                NodeRef::InMemory(path, node) => {
                    let (state, data) = match &node.data {
                        dirstate_map::NodeData::Entry(entry) => (
                            entry.state.into(),
                            Entry {
                                mode: entry.mode.into(),
                                mtime: entry.mtime.into(),
                                size: entry.size.into(),
                            },
                        ),
                        dirstate_map::NodeData::CachedDirectory { mtime } => {
                            (b'd', Entry::from_timestamp(*mtime))
                        }
                        dirstate_map::NodeData::None => (
                            b'\0',
                            Entry {
                                mode: 0.into(),
                                mtime: 0.into(),
                                size: 0.into(),
                            },
                        ),
                    };
                    Node {
                        children,
                        copy_source,
                        full_path,
                        base_name_start: u16::try_from(path.base_name_start())
                            // Could only panic for paths over 64 KiB
                            .expect("dirstate-v2 path length overflow")
                            .into(),
                        descendants_with_entry_count: node
                            .descendants_with_entry_count
                            .into(),
                        tracked_descendants_count: node
                            .tracked_descendants_count
                            .into(),
                        state,
                        data,
                    }
                }
                NodeRef::OnDisk(node) => Node {
                    children,
                    copy_source,
                    full_path,
                    ..*node
                },
            })
        }
        // … so we can write them contiguously, after writing everything else
        // they refer to.
        let start = self.current_offset();
        let len = child_nodes_len_from_usize(nodes_len);
        self.out.extend(on_disk_nodes.as_bytes());
        Ok(ChildNodes { start, len })
    }

    /// Takes a slice of items within `on_disk` and returns its offset for the
    /// start of `on_disk`.
    ///
    /// Panics if the given slice is not within `on_disk`.
    fn offset_of<T>(&self, slice: &[T]) -> Offset
    where
        T: BytesCast,
    {
        fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> {
            let start = slice.as_ptr() as usize;
            let end = start + slice.len();
            start..=end
        }
        let slice_addresses = address_range(slice.as_bytes());
        let on_disk_addresses = address_range(self.dirstate_map.on_disk);
        assert!(on_disk_addresses.contains(slice_addresses.start()));
        assert!(on_disk_addresses.contains(slice_addresses.end()));
        let offset = slice_addresses.start() - on_disk_addresses.start();
        offset_from_usize(offset)
    }

    fn current_offset(&mut self) -> Offset {
        let mut offset = self.out.len();
        if self.append {
            offset += self.dirstate_map.on_disk.len()
        }
        offset_from_usize(offset)
    }

    fn write_path(&mut self, slice: &[u8]) -> PathSlice {
        let start = self.current_offset();
        let len = path_len_from_usize(slice.len());
        self.out.extend(slice.as_bytes());
        PathSlice { start, len }
    }
}

fn offset_from_usize(x: usize) -> Offset {
    u32::try_from(x)
        // Could only panic for a dirstate file larger than 4 GiB
        .expect("dirstate-v2 offset overflow")
        .into()
}

fn child_nodes_len_from_usize(x: usize) -> Size {
    u32::try_from(x)
        // Could only panic with over 4 billion nodes
        .expect("dirstate-v2 slice length overflow")
        .into()
}

fn path_len_from_usize(x: usize) -> PathSize {
    u16::try_from(x)
        // Could only panic for paths over 64 KiB
        .expect("dirstate-v2 path length overflow")
        .into()
}