rust/hg-core/src/revlog/index.rs
changeset 51218 0112803e6c01
parent 51216 9f876765cbe2
child 51220 c817d9f626d3
equal deleted inserted replaced
51217:898674a4dbc7 51218:0112803e6c01
    18 };
    18 };
    19 
    19 
    20 pub const INDEX_ENTRY_SIZE: usize = 64;
    20 pub const INDEX_ENTRY_SIZE: usize = 64;
    21 pub const COMPRESSION_MODE_INLINE: u8 = 2;
    21 pub const COMPRESSION_MODE_INLINE: u8 = 2;
    22 
    22 
       
    23 #[derive(Debug)]
    23 pub struct IndexHeader {
    24 pub struct IndexHeader {
    24     pub(super) header_bytes: [u8; 4],
    25     pub(super) header_bytes: [u8; 4],
    25 }
    26 }
    26 
    27 
    27 #[derive(Copy, Clone)]
    28 #[derive(Copy, Clone)]
   518         let offset_override = if rev == Revision(0) { Some(0) } else { None };
   519         let offset_override = if rev == Revision(0) { Some(0) } else { None };
   519 
   520 
   520         IndexEntry {
   521         IndexEntry {
   521             bytes,
   522             bytes,
   522             offset_override,
   523             offset_override,
       
   524         }
       
   525     }
       
   526 
       
   527     fn null_entry(&self) -> IndexEntry {
       
   528         IndexEntry {
       
   529             bytes: &[0; INDEX_ENTRY_SIZE],
       
   530             offset_override: Some(0),
   523         }
   531         }
   524     }
   532     }
   525 
   533 
   526     /// Return the head revisions of this index
   534     /// Return the head revisions of this index
   527     pub fn head_revs(&mut self) -> Result<Vec<Revision>, GraphError> {
   535     pub fn head_revs(&mut self) -> Result<Vec<Revision>, GraphError> {
   590             let new_rev = if self.uses_generaldelta() {
   598             let new_rev = if self.uses_generaldelta() {
   591                 entry.base_revision_or_base_of_delta_chain()
   599                 entry.base_revision_or_base_of_delta_chain()
   592             } else {
   600             } else {
   593                 UncheckedRevision(current_rev.0 - 1)
   601                 UncheckedRevision(current_rev.0 - 1)
   594             };
   602             };
   595             if new_rev.0 == NULL_REVISION.0 {
       
   596                 break;
       
   597             }
       
   598             current_rev = self.check_revision(new_rev).ok_or_else(|| {
   603             current_rev = self.check_revision(new_rev).ok_or_else(|| {
   599                 HgError::corrupted(format!("Revision {new_rev} out of range"))
   604                 HgError::corrupted(format!("Revision {new_rev} out of range"))
   600             })?;
   605             })?;
       
   606             if current_rev.0 == NULL_REVISION.0 {
       
   607                 break;
       
   608             }
   601             entry = self.get_entry(current_rev).unwrap()
   609             entry = self.get_entry(current_rev).unwrap()
   602         }
   610         }
   603 
   611 
   604         let stopped = if stop_rev.map(|r| current_rev == r).unwrap_or(false) {
   612         let stopped = if stop_rev.map(|r| current_rev == r).unwrap_or(false) {
   605             true
   613             true
   743         let rev = self
   751         let rev = self
   744             .check_revision(rev)
   752             .check_revision(rev)
   745             .ok_or_else(|| RevlogError::corrupted("test"))?;
   753             .ok_or_else(|| RevlogError::corrupted("test"))?;
   746         self.is_snapshot_unchecked(rev)
   754         self.is_snapshot_unchecked(rev)
   747     }
   755     }
       
   756 
       
   757     /// Slice revs to reduce the amount of unrelated data to be read from disk.
       
   758     ///
       
   759     /// The index is sliced into groups that should be read in one time.
       
   760     ///
       
   761     /// The initial chunk is sliced until the overall density
       
   762     /// (payload/chunks-span ratio) is above `target_density`.
       
   763     /// No gap smaller than `min_gap_size` is skipped.
       
   764     pub fn slice_chunk_to_density(
       
   765         &self,
       
   766         revs: &[Revision],
       
   767         target_density: f64,
       
   768         min_gap_size: usize,
       
   769     ) -> Vec<Vec<Revision>> {
       
   770         if revs.is_empty() {
       
   771             return vec![];
       
   772         }
       
   773         if revs.len() == 1 {
       
   774             return vec![revs.to_owned()];
       
   775         }
       
   776         let delta_chain_span = self.segment_span(revs);
       
   777         if delta_chain_span < min_gap_size {
       
   778             return vec![revs.to_owned()];
       
   779         }
       
   780         let entries: Vec<_> = revs
       
   781             .iter()
       
   782             .map(|r| {
       
   783                 (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry()))
       
   784             })
       
   785             .collect();
       
   786 
       
   787         let mut read_data = delta_chain_span;
       
   788         let chain_payload: u32 =
       
   789             entries.iter().map(|(_r, e)| e.compressed_len()).sum();
       
   790         let mut density = if delta_chain_span > 0 {
       
   791             chain_payload as f64 / delta_chain_span as f64
       
   792         } else {
       
   793             1.0
       
   794         };
       
   795 
       
   796         if density >= target_density {
       
   797             return vec![revs.to_owned()];
       
   798         }
       
   799 
       
   800         // Store the gaps in a heap to have them sorted by decreasing size
       
   801         let mut gaps = Vec::new();
       
   802         let mut previous_end = None;
       
   803 
       
   804         for (i, (_rev, entry)) in entries.iter().enumerate() {
       
   805             let start = entry.c_start() as usize;
       
   806             let length = entry.compressed_len();
       
   807 
       
   808             // Skip empty revisions to form larger holes
       
   809             if length == 0 {
       
   810                 continue;
       
   811             }
       
   812 
       
   813             if let Some(end) = previous_end {
       
   814                 let gap_size = start - end;
       
   815                 // Only consider holes that are large enough
       
   816                 if gap_size > min_gap_size {
       
   817                     gaps.push((gap_size, i));
       
   818                 }
       
   819             }
       
   820             previous_end = Some(start + length as usize);
       
   821         }
       
   822         if gaps.is_empty() {
       
   823             return vec![revs.to_owned()];
       
   824         }
       
   825         // sort the gaps to pop them from largest to small
       
   826         gaps.sort_unstable();
       
   827 
       
   828         // Collect the indices of the largest holes until
       
   829         // the density is acceptable
       
   830         let mut selected = vec![];
       
   831         while let Some((gap_size, gap_id)) = gaps.pop() {
       
   832             if density >= target_density {
       
   833                 break;
       
   834             }
       
   835             selected.push(gap_id);
       
   836 
       
   837             // The gap sizes are stored as negatives to be sorted decreasingly
       
   838             // by the heap
       
   839             read_data -= gap_size;
       
   840             density = if read_data > 0 {
       
   841                 chain_payload as f64 / read_data as f64
       
   842             } else {
       
   843                 1.0
       
   844             };
       
   845             if density >= target_density {
       
   846                 break;
       
   847             }
       
   848         }
       
   849         selected.sort_unstable();
       
   850         selected.push(revs.len());
       
   851 
       
   852         // Cut the revs at collected indices
       
   853         let mut previous_idx = 0;
       
   854         let mut chunks = vec![];
       
   855         for idx in selected {
       
   856             let chunk = self.trim_chunk(&entries, previous_idx, idx);
       
   857             if !chunk.is_empty() {
       
   858                 chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
       
   859             }
       
   860             previous_idx = idx;
       
   861         }
       
   862         let chunk = self.trim_chunk(&entries, previous_idx, entries.len());
       
   863         if !chunk.is_empty() {
       
   864             chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect());
       
   865         }
       
   866 
       
   867         chunks
       
   868     }
       
   869 
       
   870     /// Get the byte span of a segment of sorted revisions.
       
   871     ///
       
   872     /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of
       
   873     /// the `revs` segment.
       
   874     ///
       
   875     /// panics:
       
   876     ///  - if `revs` is empty or only made of `NULL_REVISION`
       
   877     ///  - if cannot retrieve entry for the last or first not null element of
       
   878     ///    `revs`.
       
   879     fn segment_span(&self, revs: &[Revision]) -> usize {
       
   880         if revs.is_empty() {
       
   881             return 0;
       
   882         }
       
   883         let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap();
       
   884         let end = last_entry.c_start() + last_entry.compressed_len() as u64;
       
   885         let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap();
       
   886         let start = if (*first_rev).0 == 0 {
       
   887             0
       
   888         } else {
       
   889             self.get_entry(*first_rev).unwrap().c_start()
       
   890         };
       
   891         (end - start) as usize
       
   892     }
       
   893 
       
   894     /// Returns `&revs[startidx..endidx]` without empty trailing revs
       
   895     fn trim_chunk<'a>(
       
   896         &'a self,
       
   897         revs: &'a [(Revision, IndexEntry)],
       
   898         start: usize,
       
   899         mut end: usize,
       
   900     ) -> &'a [(Revision, IndexEntry)] {
       
   901         // Trim empty revs at the end, except the very first rev of a chain
       
   902         let last_rev = revs[end - 1].0;
       
   903         if last_rev.0 < self.len() as BaseRevision {
       
   904             while end > 1
       
   905                 && end > start
       
   906                 && revs[end - 1].1.compressed_len() == 0
       
   907             {
       
   908                 end -= 1
       
   909             }
       
   910         }
       
   911         &revs[start..end]
       
   912     }
   748 }
   913 }
   749 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
   914 fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) {
   750     let mut offset: usize = 0;
   915     let mut offset: usize = 0;
   751     let mut offsets = Vec::new();
   916     let mut offsets = Vec::new();
   752 
   917 
   803             BigEndian::read_u64(&bytes[..]) as usize
   968             BigEndian::read_u64(&bytes[..]) as usize
   804         }
   969         }
   805     }
   970     }
   806     pub fn raw_offset(&self) -> u64 {
   971     pub fn raw_offset(&self) -> u64 {
   807         BigEndian::read_u64(&self.bytes[0..8])
   972         BigEndian::read_u64(&self.bytes[0..8])
       
   973     }
       
   974 
       
   975     /// Same result (except potentially for rev 0) as C `index_get_start()`
       
   976     fn c_start(&self) -> u64 {
       
   977         self.raw_offset() >> 16
   808     }
   978     }
   809 
   979 
   810     pub fn flags(&self) -> u16 {
   980     pub fn flags(&self) -> u16 {
   811         BigEndian::read_u16(&self.bytes[6..=7])
   981         BigEndian::read_u16(&self.bytes[6..=7])
   812     }
   982     }