rhg: handle null changelog and manifest revisions
authorArseniy Alekseyev <aalekseyev@janestreet.com>
Tue, 12 Oct 2021 19:43:51 +0100
changeset 48198 61ce70fd420e
parent 48197 63e86fc9bfec
child 48199 9d0e5629cfbf
rhg: handle null changelog and manifest revisions Differential Revision: https://phab.mercurial-scm.org/D11650
rust/hg-core/src/revlog/changelog.rs
rust/hg-core/src/revlog/index.rs
rust/hg-core/src/revlog/revlog.rs
tests/test-empty-manifest-index.t
--- a/rust/hg-core/src/revlog/changelog.rs	Tue Oct 12 15:43:45 2021 +0200
+++ b/rust/hg-core/src/revlog/changelog.rs	Tue Oct 12 19:43:51 2021 +0100
@@ -1,5 +1,6 @@
 use crate::errors::HgError;
 use crate::repo::Repo;
+use crate::revlog::node::NULL_NODE;
 use crate::revlog::revlog::{Revlog, RevlogError};
 use crate::revlog::Revision;
 use crate::revlog::{Node, NodePrefix};
@@ -58,10 +59,9 @@
     /// Return the node id of the `manifest` referenced by this `changelog`
     /// entry.
     pub fn manifest_node(&self) -> Result<Node, HgError> {
-        Node::from_hex_for_repo(
-            self.lines()
-                .next()
-                .ok_or_else(|| HgError::corrupted("empty changelog entry"))?,
-        )
+        match self.lines().next() {
+            None => Ok(NULL_NODE),
+            Some(x) => Node::from_hex_for_repo(x),
+        }
     }
 }
--- a/rust/hg-core/src/revlog/index.rs	Tue Oct 12 15:43:45 2021 +0200
+++ b/rust/hg-core/src/revlog/index.rs	Tue Oct 12 19:43:51 2021 +0100
@@ -208,6 +208,9 @@
 
 /// Value of the inline flag.
 pub fn is_inline(index_bytes: &[u8]) -> bool {
+    if index_bytes.len() < 4 {
+        return true;
+    }
     match &index_bytes[0..=1] {
         [0, 0] | [0, 2] => false,
         _ => true,
--- a/rust/hg-core/src/revlog/revlog.rs	Tue Oct 12 15:43:45 2021 +0200
+++ b/rust/hg-core/src/revlog/revlog.rs	Tue Oct 12 19:43:51 2021 +0100
@@ -72,7 +72,7 @@
         let index_path = index_path.as_ref();
         let index_mmap = repo.store_vfs().mmap_open(&index_path)?;
 
-        let version = get_version(&index_mmap);
+        let version = get_version(&index_mmap)?;
         if version != 1 {
             // A proper new version should have had a repo/store requirement.
             return Err(HgError::corrupted("corrupted revlog"));
@@ -179,6 +179,9 @@
     /// snapshot to rebuild the final data.
     #[timed]
     pub fn get_rev_data(&self, rev: Revision) -> Result<Vec<u8>, RevlogError> {
+        if rev == NULL_REVISION {
+            return Ok(vec![]);
+        };
         // Todo return -> Cow
         let mut entry = self.get_entry(rev)?;
         let mut delta_chain = vec![];
@@ -371,8 +374,16 @@
 }
 
 /// Format version of the revlog.
-pub fn get_version(index_bytes: &[u8]) -> u16 {
-    BigEndian::read_u16(&index_bytes[2..=3])
+pub fn get_version(index_bytes: &[u8]) -> Result<u16, HgError> {
+    if index_bytes.len() == 0 {
+        return Ok(1);
+    };
+    if index_bytes.len() < 4 {
+        return Err(HgError::corrupted(
+            "corrupted revlog: can't read the index format header",
+        ));
+    };
+    Ok(BigEndian::read_u16(&index_bytes[2..=3]))
 }
 
 /// Calculate the hash of a revision given its data and its parents.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-empty-manifest-index.t	Tue Oct 12 19:43:51 2021 +0100
@@ -0,0 +1,23 @@
+Create a repo such that the changelog entry refers to a null manifest node:
+
+  $ hg init a
+  $ cd a
+  $ hg log
+  $ touch x
+  $ hg add x
+  $ hg commit -m "init"
+  $ hg rm x
+  $ hg commit -q --amend
+
+  $ wc -c < .hg/store/00manifest.i
+  0
+
+Make sure that the manifest can be read (and is empty):
+
+  $ hg --config rhg.on-unsupported=abort files -r .
+  [1]
+
+Test a null changelog rev, too:
+
+  $ hg --config rhg.on-unsupported=abort files -r 0000000000000000000000000000000000000000
+  [1]