24 use crate::utils::hg_path::HgPath; |
24 use crate::utils::hg_path::HgPath; |
25 use crate::DirstateEntry; |
25 use crate::DirstateEntry; |
26 use crate::DirstateError; |
26 use crate::DirstateError; |
27 use crate::DirstateParents; |
27 use crate::DirstateParents; |
28 use crate::EntryState; |
28 use crate::EntryState; |
29 use bytes_cast::unaligned::{I32Be, I64Be, U32Be}; |
29 use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be}; |
30 use bytes_cast::BytesCast; |
30 use bytes_cast::BytesCast; |
31 use format_bytes::format_bytes; |
31 use format_bytes::format_bytes; |
32 use std::borrow::Cow; |
32 use std::borrow::Cow; |
33 use std::convert::{TryFrom, TryInto}; |
33 use std::convert::{TryFrom, TryInto}; |
34 use std::time::{Duration, SystemTime, UNIX_EPOCH}; |
34 use std::time::{Duration, SystemTime, UNIX_EPOCH}; |
52 #[repr(C)] |
52 #[repr(C)] |
53 struct DocketHeader { |
53 struct DocketHeader { |
54 marker: [u8; V2_FORMAT_MARKER.len()], |
54 marker: [u8; V2_FORMAT_MARKER.len()], |
55 parent_1: [u8; STORED_NODE_ID_BYTES], |
55 parent_1: [u8; STORED_NODE_ID_BYTES], |
56 parent_2: [u8; STORED_NODE_ID_BYTES], |
56 parent_2: [u8; STORED_NODE_ID_BYTES], |
|
57 |
|
58 /// Counted in bytes |
57 data_size: Size, |
59 data_size: Size, |
|
60 |
58 uuid_size: u8, |
61 uuid_size: u8, |
59 } |
62 } |
60 |
63 |
61 pub struct Docket<'on_disk> { |
64 pub struct Docket<'on_disk> { |
62 header: &'on_disk DocketHeader, |
65 header: &'on_disk DocketHeader, |
96 #[repr(C)] |
99 #[repr(C)] |
97 pub(super) struct Node { |
100 pub(super) struct Node { |
98 full_path: PathSlice, |
101 full_path: PathSlice, |
99 |
102 |
100 /// In bytes from `self.full_path.start` |
103 /// In bytes from `self.full_path.start` |
101 base_name_start: Size, |
104 base_name_start: PathSize, |
102 |
105 |
103 copy_source: OptPathSlice, |
106 copy_source: OptPathSlice, |
104 children: ChildNodes, |
107 children: ChildNodes, |
105 pub(super) descendants_with_entry_count: Size, |
108 pub(super) descendants_with_entry_count: Size, |
106 pub(super) tracked_descendants_count: Size, |
109 pub(super) tracked_descendants_count: Size, |
165 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB. |
168 /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB. |
166 type Offset = U32Be; |
169 type Offset = U32Be; |
167 |
170 |
168 /// Counted in number of items |
171 /// Counted in number of items |
169 /// |
172 /// |
170 /// NOTE: not supporting directories with more than 4 billion direct children, |
173 /// NOTE: we choose not to support counting more than 4 billion nodes anywhere. |
171 /// or filenames more than 4 GiB. |
|
172 type Size = U32Be; |
174 type Size = U32Be; |
173 |
175 |
174 /// Location of consecutive, fixed-size items. |
176 /// Counted in bytes |
175 /// |
177 /// |
176 /// An item can be a single byte for paths, or a struct with |
178 /// NOTE: we choose not to support file names/paths longer than 64 KiB. |
177 /// `derive(BytesCast)`. |
179 type PathSize = U16Be; |
178 #[derive(BytesCast, Copy, Clone)] |
|
179 #[repr(C)] |
|
180 struct Slice { |
|
181 start: Offset, |
|
182 len: Size, |
|
183 } |
|
184 |
180 |
185 /// A contiguous sequence of `len` times `Node`, representing the child nodes |
181 /// A contiguous sequence of `len` times `Node`, representing the child nodes |
186 /// of either some other node or of the repository root. |
182 /// of either some other node or of the repository root. |
187 /// |
183 /// |
188 /// Always sorted by ascending `full_path`, to allow binary search. |
184 /// Always sorted by ascending `full_path`, to allow binary search. |
189 /// Since nodes with the same parent nodes also have the same parent path, |
185 /// Since nodes with the same parent nodes also have the same parent path, |
190 /// only the `base_name`s need to be compared during binary search. |
186 /// only the `base_name`s need to be compared during binary search. |
191 type ChildNodes = Slice; |
187 #[derive(BytesCast, Copy, Clone)] |
|
188 #[repr(C)] |
|
189 struct ChildNodes { |
|
190 start: Offset, |
|
191 len: Size, |
|
192 } |
192 |
193 |
193 /// A `HgPath` of `len` bytes |
194 /// A `HgPath` of `len` bytes |
194 type PathSlice = Slice; |
195 #[derive(BytesCast, Copy, Clone)] |
|
196 #[repr(C)] |
|
197 struct PathSlice { |
|
198 start: Offset, |
|
199 len: PathSize, |
|
200 } |
195 |
201 |
196 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes |
202 /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes |
197 type OptPathSlice = Slice; |
203 type OptPathSlice = PathSlice; |
198 |
204 |
199 /// Make sure that size-affecting changes are made knowingly |
205 /// Make sure that size-affecting changes are made knowingly |
200 fn _static_assert_size_of() { |
206 fn _static_assert_size_of() { |
201 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>; |
207 let _ = std::mem::transmute::<DocketHeader, [u8; 81]>; |
202 let _ = std::mem::transmute::<Root, [u8; 36]>; |
208 let _ = std::mem::transmute::<Root, [u8; 36]>; |
203 let _ = std::mem::transmute::<Node, [u8; 49]>; |
209 let _ = std::mem::transmute::<Node, [u8; 43]>; |
204 } |
210 } |
205 |
211 |
206 /// Unexpected file format found in `.hg/dirstate` with the "v2" format. |
212 /// Unexpected file format found in `.hg/dirstate` with the "v2" format. |
207 /// |
213 /// |
208 /// This should only happen if Mercurial is buggy or a repository is corrupted. |
214 /// This should only happen if Mercurial is buggy or a repository is corrupted. |
277 return Ok(DirstateMap::empty(on_disk)); |
283 return Ok(DirstateMap::empty(on_disk)); |
278 } |
284 } |
279 let root = read_root(on_disk)?; |
285 let root = read_root(on_disk)?; |
280 let dirstate_map = DirstateMap { |
286 let dirstate_map = DirstateMap { |
281 on_disk, |
287 on_disk, |
282 root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>( |
288 root: dirstate_map::ChildNodes::OnDisk(read_nodes( |
283 on_disk, |
289 on_disk, |
284 root.root_nodes, |
290 root.root_nodes, |
285 )?), |
291 )?), |
286 nodes_with_entry_count: root.nodes_with_entry_count.get(), |
292 nodes_with_entry_count: root.nodes_with_entry_count.get(), |
287 nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(), |
293 nodes_with_copy_source_count: root.nodes_with_copy_source_count.get(), |
406 |
412 |
407 pub(super) fn children<'on_disk>( |
413 pub(super) fn children<'on_disk>( |
408 &self, |
414 &self, |
409 on_disk: &'on_disk [u8], |
415 on_disk: &'on_disk [u8], |
410 ) -> Result<&'on_disk [Node], DirstateV2ParseError> { |
416 ) -> Result<&'on_disk [Node], DirstateV2ParseError> { |
411 read_slice::<Node>(on_disk, self.children) |
417 read_nodes(on_disk, self.children) |
412 } |
418 } |
413 |
419 |
414 pub(super) fn to_in_memory_node<'on_disk>( |
420 pub(super) fn to_in_memory_node<'on_disk>( |
415 &self, |
421 &self, |
416 on_disk: &'on_disk [u8], |
422 on_disk: &'on_disk [u8], |
481 } |
487 } |
482 } |
488 } |
483 |
489 |
484 fn read_hg_path( |
490 fn read_hg_path( |
485 on_disk: &[u8], |
491 on_disk: &[u8], |
486 slice: Slice, |
492 slice: PathSlice, |
487 ) -> Result<&HgPath, DirstateV2ParseError> { |
493 ) -> Result<&HgPath, DirstateV2ParseError> { |
488 let bytes = read_slice::<u8>(on_disk, slice)?; |
494 read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new) |
489 Ok(HgPath::new(bytes)) |
495 } |
490 } |
496 |
491 |
497 fn read_nodes( |
492 fn read_slice<T>( |
|
493 on_disk: &[u8], |
498 on_disk: &[u8], |
494 slice: Slice, |
499 slice: ChildNodes, |
|
500 ) -> Result<&[Node], DirstateV2ParseError> { |
|
501 read_slice(on_disk, slice.start, slice.len.get()) |
|
502 } |
|
503 |
|
504 fn read_slice<T, Len>( |
|
505 on_disk: &[u8], |
|
506 start: Offset, |
|
507 len: Len, |
495 ) -> Result<&[T], DirstateV2ParseError> |
508 ) -> Result<&[T], DirstateV2ParseError> |
496 where |
509 where |
497 T: BytesCast, |
510 T: BytesCast, |
|
511 Len: TryInto<usize>, |
498 { |
512 { |
499 // Either `usize::MAX` would result in "out of bounds" error since a single |
513 // Either `usize::MAX` would result in "out of bounds" error since a single |
500 // `&[u8]` cannot occupy the entire addess space. |
514 // `&[u8]` cannot occupy the entire addess space. |
501 let start = usize::try_from(slice.start.get()).unwrap_or(std::usize::MAX); |
515 let start = start.get().try_into().unwrap_or(std::usize::MAX); |
502 let len = usize::try_from(slice.len.get()).unwrap_or(std::usize::MAX); |
516 let len = len.try_into().unwrap_or(std::usize::MAX); |
503 on_disk |
517 on_disk |
504 .get(start..) |
518 .get(start..) |
505 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok()) |
519 .and_then(|bytes| T::slice_from_bytes(bytes, len).ok()) |
506 .map(|(slice, _rest)| slice) |
520 .map(|(slice, _rest)| slice) |
507 .ok_or_else(|| DirstateV2ParseError) |
521 .ok_or_else(|| DirstateV2ParseError) |
512 mut f: impl FnMut(&'on_disk HgPath), |
526 mut f: impl FnMut(&'on_disk HgPath), |
513 ) -> Result<(), DirstateV2ParseError> { |
527 ) -> Result<(), DirstateV2ParseError> { |
514 let root = read_root(on_disk)?; |
528 let root = read_root(on_disk)?; |
515 fn recur<'on_disk>( |
529 fn recur<'on_disk>( |
516 on_disk: &'on_disk [u8], |
530 on_disk: &'on_disk [u8], |
517 nodes: Slice, |
531 nodes: ChildNodes, |
518 f: &mut impl FnMut(&'on_disk HgPath), |
532 f: &mut impl FnMut(&'on_disk HgPath), |
519 ) -> Result<(), DirstateV2ParseError> { |
533 ) -> Result<(), DirstateV2ParseError> { |
520 for node in read_slice::<Node>(on_disk, nodes)? { |
534 for node in read_nodes(on_disk, nodes)? { |
521 if let Some(state) = node.state()? { |
535 if let Some(state) = node.state()? { |
522 if state.is_tracked() { |
536 if state.is_tracked() { |
523 f(node.full_path(on_disk)?) |
537 f(node.full_path(on_disk)?) |
524 } |
538 } |
525 } |
539 } |
563 out: &mut Vec<u8>, |
577 out: &mut Vec<u8>, |
564 ) -> Result<ChildNodes, DirstateError> { |
578 ) -> Result<ChildNodes, DirstateError> { |
565 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration |
579 // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration |
566 // order. Sort to enable binary search in the written file. |
580 // order. Sort to enable binary search in the written file. |
567 let nodes = nodes.sorted(); |
581 let nodes = nodes.sorted(); |
|
582 let nodes_len = nodes.len(); |
568 |
583 |
569 // First accumulate serialized nodes in a `Vec` |
584 // First accumulate serialized nodes in a `Vec` |
570 let mut on_disk_nodes = Vec::with_capacity(nodes.len()); |
585 let mut on_disk_nodes = Vec::with_capacity(nodes_len); |
571 for node in nodes { |
586 for node in nodes { |
572 let children = write_nodes( |
587 let children = write_nodes( |
573 dirstate_map, |
588 dirstate_map, |
574 node.children(dirstate_map.on_disk)?, |
589 node.children(dirstate_map.on_disk)?, |
575 out, |
590 out, |
576 )?; |
591 )?; |
577 let full_path = node.full_path(dirstate_map.on_disk)?; |
592 let full_path = node.full_path(dirstate_map.on_disk)?; |
578 let full_path = write_slice::<u8>(full_path.as_bytes(), out); |
593 let full_path = write_path(full_path.as_bytes(), out); |
579 let copy_source = |
594 let copy_source = |
580 if let Some(source) = node.copy_source(dirstate_map.on_disk)? { |
595 if let Some(source) = node.copy_source(dirstate_map.on_disk)? { |
581 write_slice::<u8>(source.as_bytes(), out) |
596 write_path(source.as_bytes(), out) |
582 } else { |
597 } else { |
583 Slice { |
598 PathSlice { |
584 start: 0.into(), |
599 start: 0.into(), |
585 len: 0.into(), |
600 len: 0.into(), |
586 } |
601 } |
587 }; |
602 }; |
588 on_disk_nodes.push(match node { |
603 on_disk_nodes.push(match node { |
632 full_path, |
647 full_path, |
633 ..*node |
648 ..*node |
634 }, |
649 }, |
635 }) |
650 }) |
636 } |
651 } |
637 // … so we can write them contiguously |
652 // … so we can write them contiguously, after writing everything else they |
638 Ok(write_slice::<Node>(&on_disk_nodes, out)) |
653 // refer to. |
639 } |
654 let start = current_offset(out); |
640 |
655 let len = u32::try_from(nodes_len) |
641 fn write_slice<T>(slice: &[T], out: &mut Vec<u8>) -> Slice |
656 // Could only panic with over 4 billion nodes |
642 where |
657 .expect("dirstate-v2 path length overflow") |
643 T: BytesCast, |
658 .into(); |
644 { |
659 out.extend(on_disk_nodes.as_bytes()); |
645 let start = u32::try_from(out.len()) |
660 Ok(ChildNodes { start, len }) |
|
661 } |
|
662 |
|
663 fn current_offset(out: &Vec<u8>) -> Offset { |
|
664 u32::try_from(out.len()) |
646 // Could only panic for a dirstate file larger than 4 GiB |
665 // Could only panic for a dirstate file larger than 4 GiB |
647 .expect("dirstate-v2 offset overflow") |
666 .expect("dirstate-v2 offset overflow") |
648 .into(); |
667 .into() |
649 let len = u32::try_from(slice.len()) |
668 } |
650 // Could only panic for paths over 4 GiB or nodes with over 4 billions |
669 |
651 // child nodes |
670 fn write_path(slice: &[u8], out: &mut Vec<u8>) -> PathSlice { |
652 .expect("dirstate-v2 offset overflow") |
671 let start = current_offset(out); |
|
672 let len = u16::try_from(slice.len()) |
|
673 // Could only panic for paths over 64 KiB |
|
674 .expect("dirstate-v2 path length overflow") |
653 .into(); |
675 .into(); |
654 out.extend(slice.as_bytes()); |
676 out.extend(slice.as_bytes()); |
655 Slice { start, len } |
677 PathSlice { start, len } |
656 } |
678 } |