rust/hg-core/src/copy_tracing.rs
changeset 46057 e0313b0a6f7e
parent 45975 2367937982ba
child 46058 12192fdbf3ac
equal deleted inserted replaced
46056:c407513a44a3 46057:e0313b0a6f7e
     3 use crate::Revision;
     3 use crate::Revision;
     4 
     4 
     5 use im_rc::ordmap::DiffItem;
     5 use im_rc::ordmap::DiffItem;
     6 use im_rc::ordmap::OrdMap;
     6 use im_rc::ordmap::OrdMap;
     7 
     7 
       
     8 use std::cmp::Ordering;
     8 use std::collections::HashMap;
     9 use std::collections::HashMap;
     9 use std::collections::HashSet;
    10 use std::convert::TryInto;
    10 
    11 
    11 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
    12 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
    12 
    13 
    13 #[derive(Clone, Debug, PartialEq)]
    14 #[derive(Clone, Debug, PartialEq)]
    14 struct TimeStampedPathCopy {
    15 struct TimeStampedPathCopy {
    21 
    22 
    22 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
    23 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
    23 type TimeStampedPathCopies = OrdMap<HgPathBuf, TimeStampedPathCopy>;
    24 type TimeStampedPathCopies = OrdMap<HgPathBuf, TimeStampedPathCopy>;
    24 
    25 
    25 /// hold parent 1, parent 2 and relevant files actions.
    26 /// hold parent 1, parent 2 and relevant files actions.
    26 pub type RevInfo = (Revision, Revision, ChangedFiles);
    27 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
    27 
    28 
    28 /// represent the files affected by a changesets
    29 /// represent the files affected by a changesets
    29 ///
    30 ///
    30 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
    31 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
    31 /// all the data categories tracked by it.
    32 /// all the data categories tracked by it.
    32 pub struct ChangedFiles {
    33 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
    33     removed: HashSet<HgPathBuf>,
    34 /// all the data categories tracked by it.
    34     merged: HashSet<HgPathBuf>,
    35 pub struct ChangedFiles<'a> {
    35     salvaged: HashSet<HgPathBuf>,
    36     nb_items: u32,
    36     copied_from_p1: PathCopies,
    37     index: &'a [u8],
    37     copied_from_p2: PathCopies,
    38     data: &'a [u8],
    38 }
    39 }
    39 
    40 
    40 /// Represent active changes that affect the copy tracing.
    41 /// Represent active changes that affect the copy tracing.
    41 enum Action<'a> {
    42 enum Action<'a> {
    42     /// The parent ? children edge is removing a file
    43     /// The parent ? children edge is removing a file
    60     Salvaged,
    61     Salvaged,
    61     /// Normal: Not one of the two cases above
    62     /// Normal: Not one of the two cases above
    62     Normal,
    63     Normal,
    63 }
    64 }
    64 
    65 
    65 impl ChangedFiles {
    66 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
    66     pub fn new(
    67 
    67         removed: HashSet<HgPathBuf>,
    68 const EMPTY: &[u8] = b"";
    68         merged: HashSet<HgPathBuf>,
    69 const COPY_MASK: u8 = 3;
    69         salvaged: HashSet<HgPathBuf>,
    70 const P1_COPY: u8 = 2;
    70         copied_from_p1: PathCopies,
    71 const P2_COPY: u8 = 3;
    71         copied_from_p2: PathCopies,
    72 const ACTION_MASK: u8 = 28;
    72     ) -> Self {
    73 const REMOVED: u8 = 12;
    73         ChangedFiles {
    74 const MERGED: u8 = 8;
    74             removed,
    75 const SALVAGED: u8 = 16;
    75             merged,
    76 
    76             salvaged,
    77 impl<'a> ChangedFiles<'a> {
    77             copied_from_p1,
    78     const INDEX_START: usize = 4;
    78             copied_from_p2,
    79     const ENTRY_SIZE: u32 = 9;
    79         }
    80     const FILENAME_START: u32 = 1;
       
    81     const COPY_SOURCE_START: u32 = 5;
       
    82 
       
    83     pub fn new(data: &'a [u8]) -> Self {
       
    84         assert!(
       
    85             data.len() >= 4,
       
    86             "data size ({}) is too small to contain the header (4)",
       
    87             data.len()
       
    88         );
       
    89         let nb_items_raw: [u8; 4] = (&data[0..=3])
       
    90             .try_into()
       
    91             .expect("failed to turn 4 bytes into 4 bytes");
       
    92         let nb_items = u32::from_be_bytes(nb_items_raw);
       
    93 
       
    94         let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
       
    95         let index_end = Self::INDEX_START + index_size;
       
    96 
       
    97         assert!(
       
    98             data.len() >= index_end,
       
    99             "data size ({}) is too small to fit the index_data ({})",
       
   100             data.len(),
       
   101             index_end
       
   102         );
       
   103 
       
   104         let ret = ChangedFiles {
       
   105             nb_items,
       
   106             index: &data[Self::INDEX_START..index_end],
       
   107             data: &data[index_end..],
       
   108         };
       
   109         let max_data = ret.filename_end(nb_items - 1) as usize;
       
   110         assert!(
       
   111             ret.data.len() >= max_data,
       
   112             "data size ({}) is too small to fit all data ({})",
       
   113             data.len(),
       
   114             index_end + max_data
       
   115         );
       
   116         ret
    80     }
   117     }
    81 
   118 
    82     pub fn new_empty() -> Self {
   119     pub fn new_empty() -> Self {
    83         ChangedFiles {
   120         ChangedFiles {
    84             removed: HashSet::new(),
   121             nb_items: 0,
    85             merged: HashSet::new(),
   122             index: EMPTY,
    86             salvaged: HashSet::new(),
   123             data: EMPTY,
    87             copied_from_p1: PathCopies::new(),
   124         }
    88             copied_from_p2: PathCopies::new(),
   125     }
    89         }
   126 
       
   127     /// internal function to return an individual entry at a given index
       
   128     fn entry(&'a self, idx: u32) -> FileChange<'a> {
       
   129         if idx >= self.nb_items {
       
   130             panic!(
       
   131                 "index for entry is higher that the number of file {} >= {}",
       
   132                 idx, self.nb_items
       
   133             )
       
   134         }
       
   135         let flags = self.flags(idx);
       
   136         let filename = self.filename(idx);
       
   137         let copy_idx = self.copy_idx(idx);
       
   138         let copy_source = self.filename(copy_idx);
       
   139         (flags, filename, copy_source)
       
   140     }
       
   141 
       
   142     /// internal function to return the filename of the entry at a given index
       
   143     fn filename(&self, idx: u32) -> &HgPath {
       
   144         let filename_start;
       
   145         if idx == 0 {
       
   146             filename_start = 0;
       
   147         } else {
       
   148             filename_start = self.filename_end(idx - 1)
       
   149         }
       
   150         let filename_end = self.filename_end(idx);
       
   151         let filename_start = filename_start as usize;
       
   152         let filename_end = filename_end as usize;
       
   153         HgPath::new(&self.data[filename_start..filename_end])
       
   154     }
       
   155 
       
   156     /// internal function to return the flag field of the entry at a given
       
   157     /// index
       
   158     fn flags(&self, idx: u32) -> u8 {
       
   159         let idx = idx as usize;
       
   160         self.index[idx * (Self::ENTRY_SIZE as usize)]
       
   161     }
       
   162 
       
   163     /// internal function to return the end of a filename part at a given index
       
   164     fn filename_end(&self, idx: u32) -> u32 {
       
   165         let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
       
   166         let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
       
   167         let start = start as usize;
       
   168         let end = end as usize;
       
   169         let raw = (&self.index[start..end])
       
   170             .try_into()
       
   171             .expect("failed to turn 4 bytes into 4 bytes");
       
   172         u32::from_be_bytes(raw)
       
   173     }
       
   174 
       
   175     /// internal function to return index of the copy source of the entry at a
       
   176     /// given index
       
   177     fn copy_idx(&self, idx: u32) -> u32 {
       
   178         let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
       
   179         let end = (idx + 1) * Self::ENTRY_SIZE;
       
   180         let start = start as usize;
       
   181         let end = end as usize;
       
   182         let raw = (&self.index[start..end])
       
   183             .try_into()
       
   184             .expect("failed to turn 4 bytes into 4 bytes");
       
   185         u32::from_be_bytes(raw)
    90     }
   186     }
    91 
   187 
    92     /// Return an iterator over all the `Action` in this instance.
   188     /// Return an iterator over all the `Action` in this instance.
    93     fn iter_actions(&self, parent: usize) -> impl Iterator<Item = Action> {
   189     fn iter_actions(&self, parent: usize) -> ActionsIterator {
    94         let copies_iter = match parent {
   190         ActionsIterator {
    95             1 => self.copied_from_p1.iter(),
   191             changes: &self,
    96             2 => self.copied_from_p2.iter(),
   192             parent: parent,
    97             _ => unreachable!(),
   193             current: 0,
    98         };
   194         }
    99         let remove_iter = self.removed.iter();
       
   100         let copies_iter = copies_iter.map(|(x, y)| Action::Copied(x, y));
       
   101         let remove_iter = remove_iter.map(|x| Action::Removed(x));
       
   102         copies_iter.chain(remove_iter)
       
   103     }
   195     }
   104 
   196 
   105     /// return the MergeCase value associated with a filename
   197     /// return the MergeCase value associated with a filename
   106     fn get_merge_case(&self, path: &HgPath) -> MergeCase {
   198     fn get_merge_case(&self, path: &HgPath) -> MergeCase {
   107         if self.salvaged.contains(path) {
   199         if self.nb_items == 0 {
   108             return MergeCase::Salvaged;
       
   109         } else if self.merged.contains(path) {
       
   110             return MergeCase::Merged;
       
   111         } else {
       
   112             return MergeCase::Normal;
   200             return MergeCase::Normal;
   113         }
   201         }
       
   202         let mut low_part = 0;
       
   203         let mut high_part = self.nb_items;
       
   204 
       
   205         while low_part < high_part {
       
   206             let cursor = (low_part + high_part - 1) / 2;
       
   207             let (flags, filename, _source) = self.entry(cursor);
       
   208             match path.cmp(filename) {
       
   209                 Ordering::Less => low_part = cursor + 1,
       
   210                 Ordering::Greater => high_part = cursor,
       
   211                 Ordering::Equal => {
       
   212                     return match flags & ACTION_MASK {
       
   213                         MERGED => MergeCase::Merged,
       
   214                         SALVAGED => MergeCase::Salvaged,
       
   215                         _ => MergeCase::Normal,
       
   216                     };
       
   217                 }
       
   218             }
       
   219         }
       
   220         MergeCase::Normal
   114     }
   221     }
   115 }
   222 }
   116 
   223 
   117 /// A struct responsible for answering "is X ancestors of Y" quickly
   224 /// A struct responsible for answering "is X ancestors of Y" quickly
   118 ///
   225 ///
   148             }
   255             }
   149         }
   256         }
   150     }
   257     }
   151 }
   258 }
   152 
   259 
       
   260 struct ActionsIterator<'a> {
       
   261     changes: &'a ChangedFiles<'a>,
       
   262     parent: usize,
       
   263     current: u32,
       
   264 }
       
   265 
       
   266 impl<'a> Iterator for ActionsIterator<'a> {
       
   267     type Item = Action<'a>;
       
   268 
       
   269     fn next(&mut self) -> Option<Action<'a>> {
       
   270         while self.current < self.changes.nb_items {
       
   271             let (flags, file, source) = self.changes.entry(self.current);
       
   272             self.current += 1;
       
   273             if (flags & ACTION_MASK) == REMOVED {
       
   274                 return Some(Action::Removed(file));
       
   275             }
       
   276             let copy = flags & COPY_MASK;
       
   277             if self.parent == 1 && copy == P1_COPY {
       
   278                 return Some(Action::Copied(file, source));
       
   279             }
       
   280             if self.parent == 2 && copy == P2_COPY {
       
   281                 return Some(Action::Copied(file, source));
       
   282             }
       
   283         }
       
   284         return None;
       
   285     }
       
   286 }
       
   287 
       
   288 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
       
   289 /// ChangedFiles
       
   290 ///
       
   291 /// It is passed to the RevInfoMaker callback who can assign any necessary
       
   292 /// content to the `data` attribute. The copy tracing code is responsible for
       
   293 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
       
   294 pub struct DataHolder<D> {
       
   295     /// RevInfoMaker callback should assign data referenced by the
       
   296     /// ChangedFiles struct it return to this attribute. The DataHolder
       
   297     /// lifetime will be at least as long as the ChangedFiles one.
       
   298     pub data: Option<D>,
       
   299 }
       
   300 
       
   301 pub type RevInfoMaker<'a, D> =
       
   302     Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
       
   303 
   153 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
   304 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
   154 ///
   305 ///
   155 /// Arguments are:
   306 /// Arguments are:
   156 ///
   307 ///
   157 /// revs: all revisions to be considered
   308 /// revs: all revisions to be considered
   161 ///   * first parent
   312 ///   * first parent
   162 ///   * second parent
   313 ///   * second parent
   163 ///   * ChangedFiles
   314 ///   * ChangedFiles
   164 /// isancestors(low_rev, high_rev): callback to check if a revision is an
   315 /// isancestors(low_rev, high_rev): callback to check if a revision is an
   165 ///                                 ancestor of another
   316 ///                                 ancestor of another
   166 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool>(
   317 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
   167     revs: Vec<Revision>,
   318     revs: Vec<Revision>,
   168     children: HashMap<Revision, Vec<Revision>>,
   319     children: HashMap<Revision, Vec<Revision>>,
   169     target_rev: Revision,
   320     target_rev: Revision,
   170     rev_info: &impl Fn(Revision) -> RevInfo,
   321     rev_info: RevInfoMaker<D>,
   171     is_ancestor: &A,
   322     is_ancestor: &A,
   172 ) -> PathCopies {
   323 ) -> PathCopies {
   173     let mut all_copies = HashMap::new();
   324     let mut all_copies = HashMap::new();
   174     let mut oracle = AncestorOracle::new(is_ancestor);
   325     let mut oracle = AncestorOracle::new(is_ancestor);
   175 
   326 
   187         };
   338         };
   188 
   339 
   189         for child in current_children {
   340         for child in current_children {
   190             // We will chain the copies information accumulated for `rev` with
   341             // We will chain the copies information accumulated for `rev` with
   191             // the individual copies information for each of its children.
   342             // the individual copies information for each of its children.
   192             // Creating a new PathCopies for each `rev` ? `children` vertex.
   343             // Creating a new PathCopies for each `rev` → `children` vertex.
   193             let (p1, p2, changes) = rev_info(*child);
   344             let mut d: DataHolder<D> = DataHolder { data: None };
       
   345             let (p1, p2, changes) = rev_info(*child, &mut d);
   194 
   346 
   195             let parent = if rev == p1 {
   347             let parent = if rev == p1 {
   196                 1
   348                 1
   197             } else {
   349             } else {
   198                 assert_eq!(rev, p2);
   350                 assert_eq!(rev, p2);