branching: merge stable into default
authorRaphaël Gomès <rgomes@octobus.net>
Mon, 25 Apr 2022 11:09:33 +0200
changeset 49147 10b9f11daf15
parent 49144 f45e1618cbf6 (current diff)
parent 49146 802e2c25dab8 (diff)
child 49164 a932cad26d37
branching: merge stable into default This also added the small fix need in Rust tests for the new `DirstateMap::pack_v2` API change in stable.
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/on_disk.rs
rust/hg-core/src/repo.rs
rust/hg-cpython/src/dirstate/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Tue Apr 19 12:17:23 2022 -0700
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Mon Apr 25 11:09:33 2022 +0200
@@ -50,6 +50,10 @@
 
     /// How many bytes of `on_disk` are not used anymore
     pub(super) unreachable_bytes: u32,
+
+    /// Size of the data used to first load this `DirstateMap`. Used in case
+    /// we need to write some new metadata, but no new data on disk.
+    pub(super) old_data_size: usize,
 }
 
 /// Using a plain `HgPathBuf` of the full path from the repository root as a
@@ -436,6 +440,7 @@
             nodes_with_copy_source_count: 0,
             ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
             unreachable_bytes: 0,
+            old_data_size: 0,
         }
     }
 
@@ -1232,12 +1237,13 @@
     /// Returns new data and metadata together with whether that data should be
     /// appended to the existing data file whose content is at
     /// `map.on_disk` (true), instead of written to a new data file
-    /// (false).
+    /// (false), and the previous size of data on disk.
     #[timed]
     pub fn pack_v2(
         &self,
         can_append: bool,
-    ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool), DirstateError> {
+    ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
+    {
         let map = self.get_map();
         on_disk::write(map, can_append)
     }
@@ -1795,7 +1801,8 @@
             None,
         )?;
 
-        let (packed, metadata, _should_append) = map.pack_v2(false)?;
+        let (packed, metadata, _should_append, _old_data_size) =
+            map.pack_v2(false)?;
         let packed_len = packed.len();
         assert!(packed_len > 0);
 
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Tue Apr 19 12:17:23 2022 -0700
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Mon Apr 25 11:09:33 2022 +0200
@@ -290,6 +290,7 @@
         nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
         ignore_patterns_hash: meta.ignore_patterns_hash,
         unreachable_bytes: meta.unreachable_bytes.get(),
+        old_data_size: on_disk.len(),
     };
     Ok(dirstate_map)
 }
@@ -601,11 +602,11 @@
 /// Returns new data and metadata, together with whether that data should be
 /// appended to the existing data file whose content is at
 /// `dirstate_map.on_disk` (true), instead of written to a new data file
-/// (false).
+/// (false), and the previous size of data on disk.
 pub(super) fn write(
     dirstate_map: &DirstateMap,
     can_append: bool,
-) -> Result<(Vec<u8>, TreeMetadata, bool), DirstateError> {
+) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
     let append = can_append && dirstate_map.write_should_append();
 
     // This ignores the space for paths, and for nodes without an entry.
@@ -631,7 +632,7 @@
         unused: [0; 4],
         ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
     };
-    Ok((writer.out, meta, append))
+    Ok((writer.out, meta, append, dirstate_map.old_data_size))
 }
 
 struct Writer<'dmap, 'on_disk> {
--- a/rust/hg-core/src/repo.rs	Tue Apr 19 12:17:23 2022 -0700
+++ b/rust/hg-core/src/repo.rs	Mon Apr 25 11:09:33 2022 +0200
@@ -423,22 +423,25 @@
         // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
         // it’s unset
         let parents = self.dirstate_parents()?;
-        let packed_dirstate = if self.has_dirstate_v2() {
+        let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() {
             let uuid = self.dirstate_data_file_uuid.get_or_init(self)?;
             let mut uuid = uuid.as_ref();
             let can_append = uuid.is_some();
-            let (data, tree_metadata, append) = map.pack_v2(can_append)?;
+            let (data, tree_metadata, append, old_data_size) =
+                map.pack_v2(can_append)?;
             if !append {
                 uuid = None
             }
-            let uuid = if let Some(uuid) = uuid {
-                std::str::from_utf8(uuid)
+            let (uuid, old_uuid) = if let Some(uuid) = uuid {
+                let as_str = std::str::from_utf8(uuid)
                     .map_err(|_| {
                         HgError::corrupted("non-UTF-8 dirstate data file ID")
                     })?
-                    .to_owned()
+                    .to_owned();
+                let old_uuid_to_remove = Some(as_str.to_owned());
+                (as_str, old_uuid_to_remove)
             } else {
-                DirstateDocket::new_uid()
+                (DirstateDocket::new_uid(), None)
             };
             let data_filename = format!("dirstate.{}", uuid);
             let data_filename = self.hg_vfs().join(data_filename);
@@ -453,13 +456,23 @@
                 // returns `ErrorKind::AlreadyExists`? Collision chance of two
                 // random IDs is one in 2**32
                 let mut file = options.open(&data_filename)?;
-                file.write_all(&data)?;
-                file.flush()?;
-                // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+
-                file.seek(SeekFrom::Current(0))
+                if data.is_empty() {
+                    // If we're not appending anything, the data size is the
+                    // same as in the previous docket. It is *not* the file
+                    // length, since it could have garbage at the end.
+                    // We don't have to worry about it when we do have data
+                    // to append since we rewrite the root node in this case.
+                    Ok(old_data_size as u64)
+                } else {
+                    file.write_all(&data)?;
+                    file.flush()?;
+                    // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+
+                    file.seek(SeekFrom::Current(0))
+                }
             })()
             .when_writing_file(&data_filename)?;
-            DirstateDocket::serialize(
+
+            let packed_dirstate = DirstateDocket::serialize(
                 parents,
                 tree_metadata,
                 data_size,
@@ -467,11 +480,20 @@
             )
             .map_err(|_: std::num::TryFromIntError| {
                 HgError::corrupted("overflow in dirstate docket serialization")
-            })?
+            })?;
+
+            (packed_dirstate, old_uuid)
         } else {
-            map.pack_v1(parents)?
+            (map.pack_v1(parents)?, None)
         };
-        self.hg_vfs().atomic_write("dirstate", &packed_dirstate)?;
+
+        let vfs = self.hg_vfs();
+        vfs.atomic_write("dirstate", &packed_dirstate)?;
+        if let Some(uuid) = old_uuid_to_remove {
+            // Remove the old data file after the new docket pointing to the
+            // new data file was written.
+            vfs.remove_file(format!("dirstate.{}", uuid))?;
+        }
         Ok(())
     }
 }
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs	Tue Apr 19 12:17:23 2022 -0700
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs	Mon Apr 25 11:09:33 2022 +0200
@@ -252,7 +252,7 @@
         let inner = self.inner(py).borrow();
         let result = inner.pack_v2(can_append);
         match result {
-            Ok((packed, tree_metadata, append)) => {
+            Ok((packed, tree_metadata, append, _old_data_size)) => {
                 let packed = PyBytes::new(py, &packed);
                 let tree_metadata = PyBytes::new(py, tree_metadata.as_bytes());
                 let tuple = (packed, tree_metadata, append);