rust/hg-core/src/dirstate_tree/on_disk.rs
changeset 47677 da1c0cd68d53
parent 47676 096ee2e260a3
child 47678 065e61628980
equal deleted inserted replaced
47676:096ee2e260a3 47677:da1c0cd68d53
    24 use crate::utils::hg_path::HgPath;
    24 use crate::utils::hg_path::HgPath;
    25 use crate::DirstateEntry;
    25 use crate::DirstateEntry;
    26 use crate::DirstateError;
    26 use crate::DirstateError;
    27 use crate::DirstateParents;
    27 use crate::DirstateParents;
    28 use crate::EntryState;
    28 use crate::EntryState;
    29 use bytes_cast::unaligned::{I32Be, I64Be, U32Be};
    29 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
    30 use bytes_cast::BytesCast;
    30 use bytes_cast::BytesCast;
    31 use format_bytes::format_bytes;
    31 use format_bytes::format_bytes;
    32 use std::borrow::Cow;
    32 use std::borrow::Cow;
    33 use std::convert::{TryFrom, TryInto};
    33 use std::convert::{TryFrom, TryInto};
    34 use std::time::{Duration, SystemTime, UNIX_EPOCH};
    34 use std::time::{Duration, SystemTime, UNIX_EPOCH};
    52 #[repr(C)]
    52 #[repr(C)]
    53 struct DocketHeader {
    53 struct DocketHeader {
    54     marker: [u8; V2_FORMAT_MARKER.len()],
    54     marker: [u8; V2_FORMAT_MARKER.len()],
    55     parent_1: [u8; STORED_NODE_ID_BYTES],
    55     parent_1: [u8; STORED_NODE_ID_BYTES],
    56     parent_2: [u8; STORED_NODE_ID_BYTES],
    56     parent_2: [u8; STORED_NODE_ID_BYTES],
       
    57 
       
    58     /// Counted in bytes
    57     data_size: Size,
    59     data_size: Size,
       
    60 
    58     uuid_size: u8,
    61     uuid_size: u8,
    59 }
    62 }
    60 
    63 
    61 pub struct Docket<'on_disk> {
    64 pub struct Docket<'on_disk> {
    62     header: &'on_disk DocketHeader,
    65     header: &'on_disk DocketHeader,
    96 #[repr(C)]
    99 #[repr(C)]
    97 pub(super) struct Node {
   100 pub(super) struct Node {
    98     full_path: PathSlice,
   101     full_path: PathSlice,
    99 
   102 
   100     /// In bytes from `self.full_path.start`
   103     /// In bytes from `self.full_path.start`
   101     base_name_start: Size,
   104     base_name_start: PathSize,
   102 
   105 
   103     copy_source: OptPathSlice,
   106     copy_source: OptPathSlice,
   104     children: ChildNodes,
   107     children: ChildNodes,
   105     pub(super) descendants_with_entry_count: Size,
   108     pub(super) descendants_with_entry_count: Size,
   106     pub(super) tracked_descendants_count: Size,
   109     pub(super) tracked_descendants_count: Size,
   165 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
   168 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB.
   166 type Offset = U32Be;
   169 type Offset = U32Be;
   167 
   170 
   168 /// Counted in number of items
   171 /// Counted in number of items
   169 ///
   172 ///
   170 /// NOTE: not supporting directories with more than 4 billion direct children,
   173 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere.
   171 /// or filenames more than 4 GiB.
       
   172 type Size = U32Be;
   174 type Size = U32Be;
   173 
   175 
   174 /// Location of consecutive, fixed-size items.
   176 /// Counted in bytes
   175 ///
   177 ///
   176 /// An item can be a single byte for paths, or a struct with
   178 /// NOTE: we choose not to support file names/paths longer than 64 KiB.
   177 /// `derive(BytesCast)`.
   179 type PathSize = U16Be;
   178 #[derive(BytesCast, Copy, Clone)]
       
   179 #[repr(C)]
       
   180 struct Slice {
       
   181     start: Offset,
       
   182     len: Size,
       
   183 }
       
   184 
   180 
   185 /// A contiguous sequence of `len` times `Node`, representing the child nodes
   181 /// A contiguous sequence of `len` times `Node`, representing the child nodes
   186 /// of either some other node or of the repository root.
   182 /// of either some other node or of the repository root.
   187 ///
   183 ///
   188 /// Always sorted by ascending `full_path`, to allow binary search.
   184 /// Always sorted by ascending `full_path`, to allow binary search.
   189 /// Since nodes with the same parent nodes also have the same parent path,
   185 /// Since nodes with the same parent nodes also have the same parent path,
   190 /// only the `base_name`s need to be compared during binary search.
   186 /// only the `base_name`s need to be compared during binary search.
   191 type ChildNodes = Slice;
   187 #[derive(BytesCast, Copy, Clone)]
       
   188 #[repr(C)]
       
   189 struct ChildNodes {
       
   190     start: Offset,
       
   191     len: Size,
       
   192 }
   192 
   193 
   193 /// A `HgPath` of `len` bytes
   194 /// A `HgPath` of `len` bytes
   194 type PathSlice = Slice;
   195 #[derive(BytesCast, Copy, Clone)]
       
   196 #[repr(C)]
       
   197 struct PathSlice {
       
   198     start: Offset,
       
   199     len: PathSize,
       
   200 }
   195 
   201 
   196 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
   202 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes
   197 type OptPathSlice = Slice;
   203 type OptPathSlice = PathSlice;
   198 
   204 
   199 /// Make sure that size-affecting changes are made knowingly
   205 /// Make sure that size-affecting changes are made knowingly
   200 fn _static_assert_size_of() {
   206 fn _static_assert_size_of() {
   201     let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
   207     let _ = std::mem::transmute::<DocketHeader, [u8; 81]>;
   202     let _ = std::mem::transmute::<Root, [u8; 36]>;
   208     let _ = std::mem::transmute::<Root, [u8; 36]>;
   203     let _ = std::mem::transmute::<Node, [u8; 49]>;
   209     let _ = std::mem::transmute::<Node, [u8; 43]>;
   204 }
   210 }
   205 
   211 
   206 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
   212 /// Unexpected file format found in `.hg/dirstate` with the "v2" format.
   207 ///
   213 ///
   208 /// This should only happen if Mercurial is buggy or a repository is corrupted.
   214 /// This should only happen if Mercurial is buggy or a repository is corrupted.
   277         return Ok(DirstateMap::empty(on_disk));
   283         return Ok(DirstateMap::empty(on_disk));
   278     }
   284     }
   279     let root = read_root(on_disk)?;
   285     let root = read_root(on_disk)?;
   280     let dirstate_map = DirstateMap {
   286     let dirstate_map = DirstateMap {
   281         on_disk,
   287         on_disk,
   282         root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>(
   288         root: dirstate_map::ChildNodes::OnDisk(read_nodes(
   283             on_disk,
   289             on_disk,
   284             root.root_nodes,
   290             root.root_nodes,
   285         )?),
   291         )?),
   286         nodes_with_entry_count: root.nodes_with_entry_count.get(),
   292         nodes_with_entry_count: root.nodes_with_entry_count.get(),
   287         nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
   293         nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(),
   406 
   412 
   407     pub(super) fn children<'on_disk>(
   413     pub(super) fn children<'on_disk>(
   408         &self,
   414         &self,
   409         on_disk: &'on_disk [u8],
   415         on_disk: &'on_disk [u8],
   410     ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
   416     ) -> Result<&'on_disk [Node], DirstateV2ParseError> {
   411         read_slice::<Node>(on_disk, self.children)
   417         read_nodes(on_disk, self.children)
   412     }
   418     }
   413 
   419 
   414     pub(super) fn to_in_memory_node<'on_disk>(
   420     pub(super) fn to_in_memory_node<'on_disk>(
   415         &self,
   421         &self,
   416         on_disk: &'on_disk [u8],
   422         on_disk: &'on_disk [u8],
   481     }
   487     }
   482 }
   488 }
   483 
   489 
   484 fn read_hg_path(
   490 fn read_hg_path(
   485     on_disk: &[u8],
   491     on_disk: &[u8],
   486     slice: Slice,
   492     slice: PathSlice,
   487 ) -> Result<&HgPath, DirstateV2ParseError> {
   493 ) -> Result<&HgPath, DirstateV2ParseError> {
   488     let bytes = read_slice::<u8>(on_disk, slice)?;
   494     read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new)
   489     Ok(HgPath::new(bytes))
   495 }
   490 }
   496 
   491 
   497 fn read_nodes(
   492 fn read_slice<T>(
       
   493     on_disk: &[u8],
   498     on_disk: &[u8],
   494     slice: Slice,
   499     slice: ChildNodes,
       
   500 ) -> Result<&[Node], DirstateV2ParseError> {
       
   501     read_slice(on_disk, slice.start, slice.len.get())
       
   502 }
       
   503 
       
   504 fn read_slice<T, Len>(
       
   505     on_disk: &[u8],
       
   506     start: Offset,
       
   507     len: Len,
   495 ) -> Result<&[T], DirstateV2ParseError>
   508 ) -> Result<&[T], DirstateV2ParseError>
   496 where
   509 where
   497     T: BytesCast,
   510     T: BytesCast,
       
   511     Len: TryInto<usize>,
   498 {
   512 {
   499     // Either `usize::MAX` would result in "out of bounds" error since a single
   513     // Either `usize::MAX` would result in "out of bounds" error since a single
   500     // `&[u8]` cannot occupy the entire addess space.
   514     // `&[u8]` cannot occupy the entire addess space.
   501     let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX);
   515     let start = start.get().try_into().unwrap_or(std::usize::MAX);
   502     let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX);
   516     let len = len.try_into().unwrap_or(std::usize::MAX);
   503     on_disk
   517     on_disk
   504         .get(start..)
   518         .get(start..)
   505         .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
   519         .and_then(|bytes| T::slice_from_bytes(bytes, len).ok())
   506         .map(|(slice, _rest)| slice)
   520         .map(|(slice, _rest)| slice)
   507         .ok_or_else(|| DirstateV2ParseError)
   521         .ok_or_else(|| DirstateV2ParseError)
   512     mut f: impl FnMut(&'on_disk HgPath),
   526     mut f: impl FnMut(&'on_disk HgPath),
   513 ) -> Result<(), DirstateV2ParseError> {
   527 ) -> Result<(), DirstateV2ParseError> {
   514     let root = read_root(on_disk)?;
   528     let root = read_root(on_disk)?;
   515     fn recur<'on_disk>(
   529     fn recur<'on_disk>(
   516         on_disk: &'on_disk [u8],
   530         on_disk: &'on_disk [u8],
   517         nodes: Slice,
   531         nodes: ChildNodes,
   518         f: &mut impl FnMut(&'on_disk HgPath),
   532         f: &mut impl FnMut(&'on_disk HgPath),
   519     ) -> Result<(), DirstateV2ParseError> {
   533     ) -> Result<(), DirstateV2ParseError> {
   520         for node in read_slice::<Node>(on_disk, nodes)? {
   534         for node in read_nodes(on_disk, nodes)? {
   521             if let Some(state) = node.state()? {
   535             if let Some(state) = node.state()? {
   522                 if state.is_tracked() {
   536                 if state.is_tracked() {
   523                     f(node.full_path(on_disk)?)
   537                     f(node.full_path(on_disk)?)
   524                 }
   538                 }
   525             }
   539             }
   563     out: &mut Vec<u8>,
   577     out: &mut Vec<u8>,
   564 ) -> Result<ChildNodes, DirstateError> {
   578 ) -> Result<ChildNodes, DirstateError> {
   565     // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
   579     // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
   566     // order. Sort to enable binary search in the written file.
   580     // order. Sort to enable binary search in the written file.
   567     let nodes = nodes.sorted();
   581     let nodes = nodes.sorted();
       
   582     let nodes_len = nodes.len();
   568 
   583 
   569     // First accumulate serialized nodes in a `Vec`
   584     // First accumulate serialized nodes in a `Vec`
   570     let mut on_disk_nodes = Vec::with_capacity(nodes.len());
   585     let mut on_disk_nodes = Vec::with_capacity(nodes_len);
   571     for node in nodes {
   586     for node in nodes {
   572         let children = write_nodes(
   587         let children = write_nodes(
   573             dirstate_map,
   588             dirstate_map,
   574             node.children(dirstate_map.on_disk)?,
   589             node.children(dirstate_map.on_disk)?,
   575             out,
   590             out,
   576         )?;
   591         )?;
   577         let full_path = node.full_path(dirstate_map.on_disk)?;
   592         let full_path = node.full_path(dirstate_map.on_disk)?;
   578         let full_path = write_slice::<u8>(full_path.as_bytes(), out);
   593         let full_path = write_path(full_path.as_bytes(), out);
   579         let copy_source =
   594         let copy_source =
   580             if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
   595             if let Some(source) = node.copy_source(dirstate_map.on_disk)? {
   581                 write_slice::<u8>(source.as_bytes(), out)
   596                 write_path(source.as_bytes(), out)
   582             } else {
   597             } else {
   583                 Slice {
   598                 PathSlice {
   584                     start: 0.into(),
   599                     start: 0.into(),
   585                     len: 0.into(),
   600                     len: 0.into(),
   586                 }
   601                 }
   587             };
   602             };
   588         on_disk_nodes.push(match node {
   603         on_disk_nodes.push(match node {
   610                 };
   625                 };
   611                 Node {
   626                 Node {
   612                     children,
   627                     children,
   613                     copy_source,
   628                     copy_source,
   614                     full_path,
   629                     full_path,
   615                     base_name_start: u32::try_from(path.base_name_start())
   630                     base_name_start: u16::try_from(path.base_name_start())
   616                         // Could only panic for paths over 4 GiB
   631                         // Could only panic for paths over 64 KiB
   617                         .expect("dirstate-v2 offset overflow")
   632                         .expect("dirstate-v2 path length overflow")
   618                         .into(),
   633                         .into(),
   619                     descendants_with_entry_count: node
   634                     descendants_with_entry_count: node
   620                         .descendants_with_entry_count
   635                         .descendants_with_entry_count
   621                         .into(),
   636                         .into(),
   622                     tracked_descendants_count: node
   637                     tracked_descendants_count: node
   632                 full_path,
   647                 full_path,
   633                 ..*node
   648                 ..*node
   634             },
   649             },
   635         })
   650         })
   636     }
   651     }
   637     // … so we can write them contiguously
   652     // … so we can write them contiguously, after writing everything else they
   638     Ok(write_slice::<Node>(&on_disk_nodes, out))
   653     // refer to.
   639 }
   654     let start = current_offset(out);
   640 
   655     let len = u32::try_from(nodes_len)
   641 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice
   656         // Could only panic with over 4 billion nodes
   642 where
   657         .expect("dirstate-v2 path length overflow")
   643     T: BytesCast,
   658         .into();
   644 {
   659     out.extend(on_disk_nodes.as_bytes());
   645     let start = u32::try_from(out.len())
   660     Ok(ChildNodes { start, len })
       
   661 }
       
   662 
       
   663 fn current_offset(out: &Vec<u8>) -> Offset {
       
   664     u32::try_from(out.len())
   646         // Could only panic for a dirstate file larger than 4 GiB
   665         // Could only panic for a dirstate file larger than 4 GiB
   647         .expect("dirstate-v2 offset overflow")
   666         .expect("dirstate-v2 offset overflow")
   648         .into();
   667         .into()
   649     let len = u32::try_from(slice.len())
   668 }
   650         // Could only panic for paths over 4 GiB or nodes with over 4 billions
   669 
   651         // child nodes
   670 fn write_path(slice: &[u8], out: &mut Vec<u8>) -> PathSlice {
   652         .expect("dirstate-v2 offset overflow")
   671     let start = current_offset(out);
       
   672     let len = u16::try_from(slice.len())
       
   673         // Could only panic for paths over 64 KiB
       
   674         .expect("dirstate-v2 path length overflow")
   653         .into();
   675         .into();
   654     out.extend(slice.as_bytes());
   676     out.extend(slice.as_bytes());
   655     Slice { start, len }
   677     PathSlice { start, len }
   656 }
   678 }