hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2)
authorAntoine cezar<acezar@chwitlabs.fr>
Mon, 28 Sep 2020 15:13:51 +0200
changeset 45601 900b9b79b99c
parent 45600 b68b19104d16
child 45602 1cef583541c0
hg-core: make `Index` owner of its bytes (D8958#inline-14994 followup 1/2) Prevent building `Index` every time it is needed. It was a bad idea anyway. When `Index::new` will return `Result` it will avoid things like `Revlog::len` returning `Result<usize>` instead of `usize`. [X] make `Index` owner of its bytes [ ] make `Index::new` return an error if `offset != bytes.len()` Differential Revision: https://phab.mercurial-scm.org/D9106
rust/hg-core/src/revlog/index.rs
rust/hg-core/src/revlog/revlog.rs
--- a/rust/hg-core/src/revlog/index.rs	Mon Sep 28 14:33:52 2020 +0200
+++ b/rust/hg-core/src/revlog/index.rs	Mon Sep 28 15:13:51 2020 +0200
@@ -1,3 +1,5 @@
+use std::ops::Deref;
+
 use byteorder::{BigEndian, ByteOrder};
 
 use crate::revlog::{Revision, NULL_REVISION};
@@ -5,19 +7,18 @@
 pub const INDEX_ENTRY_SIZE: usize = 64;
 
 /// A Revlog index
-#[derive(Debug)]
-pub struct Index<'a> {
-    bytes: &'a [u8],
+pub struct Index {
+    bytes: Box<dyn Deref<Target = [u8]> + Send>,
     /// Offsets of starts of index blocks.
     /// Only needed when the index is interleaved with data.
     offsets: Option<Vec<usize>>,
 }
 
-impl<'a> Index<'a> {
+impl Index {
     /// Create an index from bytes.
     /// Calculate the start of each entry when is_inline is true.
-    pub fn new(bytes: &'a [u8], is_inline: bool) -> Self {
-        if is_inline {
+    pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send>) -> Self {
+        if is_inline(&bytes) {
             let mut offset: usize = 0;
             let mut offsets = Vec::new();
 
@@ -44,6 +45,19 @@
         }
     }
 
+    /// Value of the inline flag.
+    pub fn is_inline(&self) -> bool {
+        is_inline(&self.bytes)
+    }
+
+    /// Return a slice of bytes if `revlog` is inline. Panic if not.
+    pub fn data(&self, start: usize, end: usize) -> &[u8] {
+        if !self.is_inline() {
+            panic!("tried to access data in the index of a revlog that is not inline");
+        }
+        &self.bytes[start..end]
+    }
+
     /// Return number of entries of the revlog index.
     pub fn len(&self) -> usize {
         if let Some(offsets) = &self.offsets {
@@ -172,6 +186,14 @@
     }
 }
 
+/// Value of the inline flag.
+pub fn is_inline(index_bytes: &[u8]) -> bool {
+    match &index_bytes[0..=1] {
+        [0, 0] | [0, 2] => false,
+        _ => true,
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -269,6 +291,39 @@
     }
 
     #[test]
+    fn is_not_inline_when_no_inline_flag_test() {
+        let bytes = IndexEntryBuilder::new()
+            .is_first(true)
+            .with_general_delta(false)
+            .with_inline(false)
+            .build();
+
+        assert_eq!(is_inline(&bytes), false)
+    }
+
+    #[test]
+    fn is_inline_when_inline_flag_test() {
+        let bytes = IndexEntryBuilder::new()
+            .is_first(true)
+            .with_general_delta(false)
+            .with_inline(true)
+            .build();
+
+        assert_eq!(is_inline(&bytes), true)
+    }
+
+    #[test]
+    fn is_inline_when_inline_and_generaldelta_flags_test() {
+        let bytes = IndexEntryBuilder::new()
+            .is_first(true)
+            .with_general_delta(true)
+            .with_inline(true)
+            .build();
+
+        assert_eq!(is_inline(&bytes), true)
+    }
+
+    #[test]
     fn test_offset() {
         let bytes = IndexEntryBuilder::new().with_offset(1).build();
         let entry = IndexEntry {
--- a/rust/hg-core/src/revlog/revlog.rs	Mon Sep 28 14:33:52 2020 +0200
+++ b/rust/hg-core/src/revlog/revlog.rs	Mon Sep 28 15:13:51 2020 +0200
@@ -36,7 +36,7 @@
     /// When index and data are not interleaved: bytes of the revlog index.
     /// When index and data are interleaved: bytes of the revlog index and
     /// data.
-    index_bytes: Box<dyn Deref<Target = [u8]> + Send>,
+    index: Index,
     /// When index and data are not interleaved: bytes of the revlog data
     data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
 }
@@ -56,15 +56,13 @@
             return Err(RevlogError::UnsuportedVersion(version));
         }
 
-        let is_inline = is_inline(&index_mmap);
-
-        let index_bytes = Box::new(index_mmap);
+        let index = Index::new(Box::new(index_mmap));
 
         // TODO load data only when needed //
         // type annotation required
         // won't recognize Mmap as Deref<Target = [u8]>
         let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
-            if is_inline {
+            if index.is_inline() {
                 None
             } else {
                 let data_path = index_path.with_extension("d");
@@ -73,31 +71,27 @@
                 Some(Box::new(data_mmap))
             };
 
-        Ok(Revlog {
-            index_bytes,
-            data_bytes,
-        })
+        Ok(Revlog { index, data_bytes })
     }
 
     /// Return number of entries of the `Revlog`.
     pub fn len(&self) -> usize {
-        self.index().len()
+        self.index.len()
     }
 
     /// Returns `true` if the `Revlog` has zero `entries`.
     pub fn is_empty(&self) -> bool {
-        self.index().is_empty()
+        self.index.is_empty()
     }
 
     /// Return the full data associated to a node.
     #[timed]
     pub fn get_node_rev(&self, node: &[u8]) -> Result<Revision, RevlogError> {
-        let index = self.index();
         // This is brute force. But it is fast enough for now.
         // Optimization will come later.
         for rev in (0..self.len() as Revision).rev() {
             let index_entry =
-                index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
+                self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
             if node == index_entry.hash() {
                 return Ok(rev);
             }
@@ -123,9 +117,10 @@
         }
 
         // TODO do not look twice in the index
-        let index = self.index();
-        let index_entry =
-            index.get_entry(rev).ok_or(RevlogError::InvalidRevision)?;
+        let index_entry = self
+            .index
+            .get_entry(rev)
+            .ok_or(RevlogError::InvalidRevision)?;
 
         let data: Vec<u8> = if delta_chain.is_empty() {
             entry.data()?.into()
@@ -153,13 +148,12 @@
         expected: &[u8],
         data: &[u8],
     ) -> bool {
-        let index = self.index();
-        let e1 = index.get_entry(p1);
+        let e1 = self.index.get_entry(p1);
         let h1 = match e1 {
             Some(ref entry) => entry.hash(),
             None => &NULL_NODE_ID,
         };
-        let e2 = index.get_entry(p2);
+        let e2 = self.index.get_entry(p2);
         let h2 = match e2 {
             Some(ref entry) => entry.hash(),
             None => &NULL_NODE_ID,
@@ -187,30 +181,32 @@
         Ok(patch.apply(&snapshot))
     }
 
-    /// Return the revlog index.
-    pub fn index(&self) -> Index {
-        let is_inline = self.data_bytes.is_none();
-        Index::new(&self.index_bytes, is_inline)
-    }
-
     /// Return the revlog data.
     fn data(&self) -> &[u8] {
         match self.data_bytes {
             Some(ref data_bytes) => &data_bytes,
-            None => &self.index_bytes,
+            None => panic!(
+                "forgot to load the data or trying to access inline data"
+            ),
         }
     }
 
     /// Get an entry of the revlog.
     fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> {
-        let index = self.index();
-        let index_entry =
-            index.get_entry(rev).ok_or(RevlogError::InvalidRevision)?;
+        let index_entry = self
+            .index
+            .get_entry(rev)
+            .ok_or(RevlogError::InvalidRevision)?;
         let start = index_entry.offset();
         let end = start + index_entry.compressed_len();
+        let data = if self.index.is_inline() {
+            self.index.data(start, end)
+        } else {
+            &self.data()[start..end]
+        };
         let entry = RevlogEntry {
             rev,
-            bytes: &self.data()[start..end],
+            bytes: data,
             compressed_len: index_entry.compressed_len(),
             uncompressed_len: index_entry.uncompressed_len(),
             base_rev: if index_entry.base_revision() == rev {
@@ -296,14 +292,6 @@
     }
 }
 
-/// Value of the inline flag.
-pub fn is_inline(index_bytes: &[u8]) -> bool {
-    match &index_bytes[0..=1] {
-        [0, 0] | [0, 2] => false,
-        _ => true,
-    }
-}
-
 /// Format version of the revlog.
 pub fn get_version(index_bytes: &[u8]) -> u16 {
     BigEndian::read_u16(&index_bytes[2..=3])
@@ -332,113 +320,12 @@
 
     use super::super::index::IndexEntryBuilder;
 
-    #[cfg(test)]
-    pub struct RevlogBuilder {
-        version: u16,
-        is_general_delta: bool,
-        is_inline: bool,
-        offset: usize,
-        index: Vec<Vec<u8>>,
-        data: Vec<Vec<u8>>,
-    }
-
-    #[cfg(test)]
-    impl RevlogBuilder {
-        pub fn new() -> Self {
-            Self {
-                version: 2,
-                is_inline: false,
-                is_general_delta: true,
-                offset: 0,
-                index: vec![],
-                data: vec![],
-            }
-        }
-
-        pub fn with_inline(&mut self, value: bool) -> &mut Self {
-            self.is_inline = value;
-            self
-        }
-
-        pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
-            self.is_general_delta = value;
-            self
-        }
-
-        pub fn with_version(&mut self, value: u16) -> &mut Self {
-            self.version = value;
-            self
-        }
-
-        pub fn push(
-            &mut self,
-            mut index: IndexEntryBuilder,
-            data: Vec<u8>,
-        ) -> &mut Self {
-            if self.index.is_empty() {
-                index.is_first(true);
-                index.with_general_delta(self.is_general_delta);
-                index.with_inline(self.is_inline);
-                index.with_version(self.version);
-            } else {
-                index.with_offset(self.offset);
-            }
-            self.index.push(index.build());
-            self.offset += data.len();
-            self.data.push(data);
-            self
-        }
-
-        pub fn build_inline(&self) -> Vec<u8> {
-            let mut bytes =
-                Vec::with_capacity(self.index.len() + self.data.len());
-            for (index, data) in self.index.iter().zip(self.data.iter()) {
-                bytes.extend(index);
-                bytes.extend(data);
-            }
-            bytes
-        }
-    }
-
-    #[test]
-    fn is_not_inline_when_no_inline_flag_test() {
-        let bytes = RevlogBuilder::new()
-            .with_general_delta(false)
-            .with_inline(false)
-            .push(IndexEntryBuilder::new(), vec![])
-            .build_inline();
-
-        assert_eq!(is_inline(&bytes), false)
-    }
-
-    #[test]
-    fn is_inline_when_inline_flag_test() {
-        let bytes = RevlogBuilder::new()
-            .with_general_delta(false)
-            .with_inline(true)
-            .push(IndexEntryBuilder::new(), vec![])
-            .build_inline();
-
-        assert_eq!(is_inline(&bytes), true)
-    }
-
-    #[test]
-    fn is_inline_when_inline_and_generaldelta_flags_test() {
-        let bytes = RevlogBuilder::new()
-            .with_general_delta(true)
-            .with_inline(true)
-            .push(IndexEntryBuilder::new(), vec![])
-            .build_inline();
-
-        assert_eq!(is_inline(&bytes), true)
-    }
-
     #[test]
     fn version_test() {
-        let bytes = RevlogBuilder::new()
+        let bytes = IndexEntryBuilder::new()
+            .is_first(true)
             .with_version(1)
-            .push(IndexEntryBuilder::new(), vec![])
-            .build_inline();
+            .build();
 
         assert_eq!(get_version(&bytes), 1)
     }