rust: mostly avoid streaming zstd decompression
authorArseniy Alekseyev <aalekseyev@janestreet.com>
Thu, 18 May 2023 17:53:17 +0100
changeset 50508 39ed7b2953bb
parent 50507 d1cab48354bc
child 50509 1b73868d17cf
rust: mostly avoid streaming zstd decompression Streaming ZStd decompression seems slightly slower, and the API we use makes it very inconvenient to re-use the decompression context. Instead of using that, use the buffer-backed version, because we can give a reasonable-ish size estimate.
rust/hg-core/src/revlog/mod.rs
--- a/rust/hg-core/src/revlog/mod.rs	Thu May 18 17:25:18 2023 +0100
+++ b/rust/hg-core/src/revlog/mod.rs	Thu May 18 17:53:17 2023 +0100
@@ -596,13 +596,26 @@
     }
 
     fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
+        let cap = self.uncompressed_len.max(0) as usize;
         if self.is_delta() {
-            let mut buf = Vec::with_capacity(self.compressed_len as usize);
-            zstd::stream::copy_decode(self.bytes, &mut buf)
-                .map_err(|e| corrupted(e.to_string()))?;
+            // [cap] is usually an over-estimate of the space needed because
+            // it's the length of delta-decoded data, but we're interested
+            // in the size of the delta.
+            // This means we have to [shrink_to_fit] to avoid holding on
+            // to a large chunk of memory, but it also means we must have a
+            // fallback branch, for the case when the delta is longer than
+            // the original data (surprisingly, this does happen in practice)
+            let mut buf = Vec::with_capacity(cap);
+            match zstd_decompress_to_buffer(self.bytes, &mut buf) {
+                Ok(_) => buf.shrink_to_fit(),
+                Err(_) => {
+                    buf.clear();
+                    zstd::stream::copy_decode(self.bytes, &mut buf)
+                        .map_err(|e| corrupted(e.to_string()))?;
+                }
+            };
             Ok(buf)
         } else {
-            let cap = self.uncompressed_len.max(0) as usize;
             let mut buf = Vec::with_capacity(cap);
             let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
                 .map_err(|e| corrupted(e.to_string()))?;