dirstate-v2: Allow tree nodes without an entry to store a timestamp
authorSimon Sapin <simon.sapin@octobus.net>
Thu, 27 May 2021 18:40:54 +0200
changeset 47348 a4de570e61fa
parent 47347 73ddcedeaadf
child 47349 7138c863d0a1
dirstate-v2: Allow tree nodes without an entry to store a timestamp Timestamps are stored on 96 bits: * 64 bits for the signed number of seconds since the Unix epoch * 32 bits for the nanoseconds in the `0 <= ns < 1_000_000_000` range For now timestamps are not used or set yet. Differential Revision: https://phab.mercurial-scm.org/D10825
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/on_disk.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Fri May 28 20:07:27 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Thu May 27 18:40:54 2021 +0200
@@ -295,18 +295,13 @@
         }
     }
 
-    pub(super) fn has_entry(&self) -> bool {
-        match self {
-            NodeRef::InMemory(_path, node) => node.entry.is_some(),
-            NodeRef::OnDisk(node) => node.has_entry(),
-        }
-    }
-
     pub(super) fn entry(
         &self,
     ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
         match self {
-            NodeRef::InMemory(_path, node) => Ok(node.entry),
+            NodeRef::InMemory(_path, node) => {
+                Ok(node.data.as_entry().copied())
+            }
             NodeRef::OnDisk(node) => node.entry(),
         }
     }
@@ -316,7 +311,7 @@
     ) -> Result<Option<EntryState>, DirstateV2ParseError> {
         match self {
             NodeRef::InMemory(_path, node) => {
-                Ok(node.entry.as_ref().map(|entry| entry.state))
+                Ok(node.data.as_entry().map(|entry| entry.state))
             }
             NodeRef::OnDisk(node) => node.state(),
         }
@@ -333,8 +328,7 @@
 /// Represents a file or a directory
 #[derive(Default)]
 pub(super) struct Node<'on_disk> {
-    /// `None` for directories
-    pub(super) entry: Option<DirstateEntry>,
+    pub(super) data: NodeData,
 
     pub(super) copy_source: Option<Cow<'on_disk, HgPath>>,
 
@@ -344,6 +338,34 @@
     pub(super) tracked_descendants_count: u32,
 }
 
+pub(super) enum NodeData {
+    Entry(DirstateEntry),
+    CachedDirectory { mtime: on_disk::Timestamp },
+    None,
+}
+
+impl Default for NodeData {
+    fn default() -> Self {
+        NodeData::None
+    }
+}
+
+impl NodeData {
+    fn has_entry(&self) -> bool {
+        match self {
+            NodeData::Entry(_) => true,
+            _ => false,
+        }
+    }
+
+    fn as_entry(&self) -> Option<&DirstateEntry> {
+        match self {
+            NodeData::Entry(entry) => Some(entry),
+            _ => None,
+        }
+    }
+}
+
 impl<'on_disk> DirstateMap<'on_disk> {
     pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
         Self {
@@ -386,14 +408,14 @@
                     },
                 )?;
                 assert!(
-                    node.entry.is_none(),
+                    !node.data.has_entry(),
                     "duplicate dirstate entry in read"
                 );
                 assert!(
                     node.copy_source.is_none(),
                     "duplicate dirstate entry in read"
                 );
-                node.entry = Some(*entry);
+                node.data = NodeData::Entry(*entry);
                 node.copy_source = copy_source.map(Cow::Borrowed);
                 map.nodes_with_entry_count += 1;
                 if copy_source.is_some() {
@@ -519,10 +541,10 @@
                 }
             },
         )?;
-        if node.entry.is_none() {
+        if !node.data.has_entry() {
             self.nodes_with_entry_count += 1
         }
-        node.entry = Some(new_entry);
+        node.data = NodeData::Entry(new_entry);
         Ok(())
     }
 
@@ -587,7 +609,7 @@
                 &mut self.root,
                 path.as_ref(),
             )? {
-                if let Some(entry) = node.entry.as_mut() {
+                if let NodeData::Entry(entry) = &mut node.data {
                     entry.clear_mtime();
                 }
             }
@@ -703,18 +725,22 @@
                     return Ok(None);
                 }
             } else {
+                let had_entry = node.data.has_entry();
+                if had_entry {
+                    node.data = NodeData::None
+                }
                 dropped = Dropped {
                     was_tracked: node
-                        .entry
-                        .as_ref()
+                        .data
+                        .as_entry()
                         .map_or(false, |entry| entry.state.is_tracked()),
-                    had_entry: node.entry.take().is_some(),
+                    had_entry,
                     had_copy_source: node.copy_source.take().is_some(),
                 };
             }
             // After recursion, for both leaf (rest_of_path is None) nodes and
             // parent nodes, remove a node if it just became empty.
-            if node.entry.is_none()
+            if !node.data.has_entry()
                 && node.copy_source.is_none()
                 && node.children.is_empty()
             {
@@ -746,7 +772,7 @@
             if let Some(node) =
                 Self::get_node_mut(self.on_disk, &mut self.root, &filename)?
             {
-                if let Some(entry) = node.entry.as_mut() {
+                if let NodeData::Entry(entry) = &mut node.data {
                     entry.clear_ambiguous_mtime(now);
                 }
             }
@@ -815,7 +841,8 @@
         if let Some(node) = self.get_node(directory)? {
             // A node without a `DirstateEntry` was created to hold child
             // nodes, and is therefore a directory.
-            Ok(!node.has_entry() && node.tracked_descendants_count() > 0)
+            let state = node.state()?;
+            Ok(state.is_none() && node.tracked_descendants_count() > 0)
         } else {
             Ok(false)
         }
@@ -825,7 +852,7 @@
         if let Some(node) = self.get_node(directory)? {
             // A node without a `DirstateEntry` was created to hold child
             // nodes, and is therefore a directory.
-            Ok(!node.has_entry())
+            Ok(node.state()?.is_none())
         } else {
             Ok(false)
         }
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Fri May 28 20:07:27 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Thu May 27 18:40:54 2021 +0200
@@ -17,10 +17,11 @@
 use crate::DirstateError;
 use crate::DirstateParents;
 use crate::EntryState;
-use bytes_cast::unaligned::{I32Be, U32Be, U64Be};
+use bytes_cast::unaligned::{I32Be, I64Be, U32Be, U64Be};
 use bytes_cast::BytesCast;
 use std::borrow::Cow;
-use std::convert::{TryFrom, TryInto};
+use std::convert::TryFrom;
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
 
 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
 /// This a redundant sanity check more than an actual "magic number" since
@@ -50,22 +51,43 @@
     base_name_start: Size,
 
     copy_source: OptPathSlice,
-    entry: OptEntry,
     children: ChildNodes,
     pub(super) tracked_descendants_count: Size,
+
+    /// Dependending on the value of `state`:
+    ///
+    /// * A null byte: `data` represents nothing
+    /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
+    ///   represents a dirstate entry like in the v1 format.
+    /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
+    ///   as the `Timestamp` for the mtime of a cached directory.
+    ///
+    /// TODO: document directory caching
+    state: u8,
+    data: Entry,
 }
 
-/// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1
-/// format
 #[derive(BytesCast, Copy, Clone)]
 #[repr(C)]
-struct OptEntry {
-    state: u8,
+struct Entry {
     mode: I32Be,
     mtime: I32Be,
     size: I32Be,
 }
 
+/// Duration since the Unix epoch
+#[derive(BytesCast, Copy, Clone)]
+#[repr(C)]
+pub(super) struct Timestamp {
+    seconds: I64Be,
+
+    /// In `0 .. 1_000_000_000`.
+    ///
+    /// This timestamp is later or earlier than `(seconds, 0)` by this many
+    /// nanoseconds, if `seconds` is non-negative or negative, respectively.
+    nanoseconds: U32Be,
+}
+
 /// Counted in bytes from the start of the file
 ///
 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
@@ -216,34 +238,54 @@
         })
     }
 
-    pub(super) fn has_entry(&self) -> bool {
-        self.entry.state != b'\0'
+    pub(super) fn node_data(
+        &self,
+    ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
+        let entry = |state| {
+            dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
+        };
+
+        match self.state {
+            b'\0' => Ok(dirstate_map::NodeData::None),
+            b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
+                mtime: *self.data.as_timestamp(),
+            }),
+            b'n' => Ok(entry(EntryState::Normal)),
+            b'a' => Ok(entry(EntryState::Added)),
+            b'r' => Ok(entry(EntryState::Removed)),
+            b'm' => Ok(entry(EntryState::Merged)),
+            _ => Err(DirstateV2ParseError),
+        }
     }
 
     pub(super) fn state(
         &self,
     ) -> Result<Option<EntryState>, DirstateV2ParseError> {
-        Ok(if self.has_entry() {
-            Some(
-                self.entry
-                    .state
-                    .try_into()
-                    .map_err(|_| DirstateV2ParseError)?,
-            )
-        } else {
-            None
-        })
+        match self.state {
+            b'\0' | b'd' => Ok(None),
+            b'n' => Ok(Some(EntryState::Normal)),
+            b'a' => Ok(Some(EntryState::Added)),
+            b'r' => Ok(Some(EntryState::Removed)),
+            b'm' => Ok(Some(EntryState::Merged)),
+            _ => Err(DirstateV2ParseError),
+        }
+    }
+
+    fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
+        DirstateEntry {
+            state,
+            mode: self.data.mode.get(),
+            mtime: self.data.mtime.get(),
+            size: self.data.size.get(),
+        }
     }
 
     pub(super) fn entry(
         &self,
     ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
-        Ok(self.state()?.map(|state| DirstateEntry {
-            state,
-            mode: self.entry.mode.get(),
-            mtime: self.entry.mtime.get(),
-            size: self.entry.size.get(),
-        }))
+        Ok(self
+            .state()?
+            .map(|state| self.entry_with_given_state(state)))
     }
 
     pub(super) fn children<'on_disk>(
@@ -262,12 +304,58 @@
                 self.children(on_disk)?,
             ),
             copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
-            entry: self.entry()?,
+            data: self.node_data()?,
             tracked_descendants_count: self.tracked_descendants_count.get(),
         })
     }
 }
 
+impl Entry {
+    fn from_timestamp(timestamp: Timestamp) -> Self {
+        // Safety: both types implement the `ByteCast` trait, so we could
+        // safely use `as_bytes` and `from_bytes` to do this conversion. Using
+        // `transmute` instead makes the compiler check that the two types
+        // have the same size, which eliminates the error case of
+        // `from_bytes`.
+        unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
+    }
+
+    fn as_timestamp(&self) -> &Timestamp {
+        // Safety: same as above in `from_timestamp`
+        unsafe { &*(self as *const Entry as *const Timestamp) }
+    }
+}
+
+impl From<&'_ SystemTime> for Timestamp {
+    fn from(system_time: &'_ SystemTime) -> Self {
+        let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
+            Ok(duration) => {
+                (duration.as_secs() as i64, duration.subsec_nanos())
+            }
+            Err(error) => {
+                let negative = error.duration();
+                (-(negative.as_secs() as i64), negative.subsec_nanos())
+            }
+        };
+        Timestamp {
+            seconds: secs.into(),
+            nanoseconds: nanos.into(),
+        }
+    }
+}
+
+impl From<&'_ Timestamp> for SystemTime {
+    fn from(timestamp: &'_ Timestamp) -> Self {
+        let secs = timestamp.seconds.get();
+        let nanos = timestamp.nanoseconds.get();
+        if secs >= 0 {
+            UNIX_EPOCH + Duration::new(secs as u64, nanos)
+        } else {
+            UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
+        }
+    }
+}
+
 fn read_hg_path(
     on_disk: &[u8],
     slice: Slice,
@@ -356,33 +444,43 @@
                 }
             };
         on_disk_nodes.push(match node {
-            NodeRef::InMemory(path, node) => Node {
-                children,
-                copy_source,
-                full_path,
-                base_name_start: u32::try_from(path.base_name_start())
-                    // Could only panic for paths over 4 GiB
-                    .expect("dirstate-v2 offset overflow")
-                    .into(),
-                tracked_descendants_count: node
-                    .tracked_descendants_count
-                    .into(),
-                entry: if let Some(entry) = &node.entry {
-                    OptEntry {
-                        state: entry.state.into(),
-                        mode: entry.mode.into(),
-                        mtime: entry.mtime.into(),
-                        size: entry.size.into(),
+            NodeRef::InMemory(path, node) => {
+                let (state, data) = match &node.data {
+                    dirstate_map::NodeData::Entry(entry) => (
+                        entry.state.into(),
+                        Entry {
+                            mode: entry.mode.into(),
+                            mtime: entry.mtime.into(),
+                            size: entry.size.into(),
+                        },
+                    ),
+                    dirstate_map::NodeData::CachedDirectory { mtime } => {
+                        (b'd', Entry::from_timestamp(*mtime))
                     }
-                } else {
-                    OptEntry {
-                        state: b'\0',
-                        mode: 0.into(),
-                        mtime: 0.into(),
-                        size: 0.into(),
-                    }
-                },
-            },
+                    dirstate_map::NodeData::None => (
+                        b'\0',
+                        Entry {
+                            mode: 0.into(),
+                            mtime: 0.into(),
+                            size: 0.into(),
+                        },
+                    ),
+                };
+                Node {
+                    children,
+                    copy_source,
+                    full_path,
+                    base_name_start: u32::try_from(path.base_name_start())
+                        // Could only panic for paths over 4 GiB
+                        .expect("dirstate-v2 offset overflow")
+                        .into(),
+                    tracked_descendants_count: node
+                        .tracked_descendants_count
+                        .into(),
+                    state,
+                    data,
+                }
+            }
             NodeRef::OnDisk(node) => Node {
                 children,
                 copy_source,