rust-revlog: teach the revlog opening code to read the repo options
authorRaphaël Gomès <rgomes@octobus.net>
Mon, 18 Sep 2023 17:11:11 +0200
changeset 51191 13f58ce70299
parent 51190 6ec8387eb0be
child 51192 65c9032e2e5a
rust-revlog: teach the revlog opening code to read the repo options This will become necessary as we start writing revlog data from Rust.
mercurial/revlog.py
rust/hg-core/src/operations/debugdata.rs
rust/hg-core/src/repo.rs
rust/hg-core/src/requirements.rs
rust/hg-core/src/revlog/changelog.rs
rust/hg-core/src/revlog/filelog.rs
rust/hg-core/src/revlog/index.rs
rust/hg-core/src/revlog/manifest.rs
rust/hg-core/src/revlog/mod.rs
rust/hg-cpython/src/revlog.rs
rust/rhg/src/commands/status.rs
tests/test-rust-revlog.py
--- a/mercurial/revlog.py	Tue Jun 27 17:34:51 2023 +0200
+++ b/mercurial/revlog.py	Mon Sep 18 17:11:11 2023 +0200
@@ -16,6 +16,7 @@
 import binascii
 import collections
 import contextlib
+import functools
 import io
 import os
 import struct
@@ -224,9 +225,9 @@
     parse_index_v1_nodemap = None
 
 
-def parse_index_v1_mixed(data, inline):
+def parse_index_v1_mixed(data, inline, default_header):
     index, cache = parse_index_v1(data, inline)
-    return rustrevlog.MixedIndex(index, data), cache
+    return rustrevlog.MixedIndex(index, data, default_header), cache
 
 
 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
@@ -1694,7 +1695,9 @@
         elif devel_nodemap:
             self._parse_index = parse_index_v1_nodemap
         elif use_rust_index:
-            self._parse_index = parse_index_v1_mixed
+            self._parse_index = functools.partial(
+                parse_index_v1_mixed, default_header=new_header
+            )
         try:
             d = self._parse_index(index_data, self._inline)
             index, chunkcache = d
--- a/rust/hg-core/src/operations/debugdata.rs	Tue Jun 27 17:34:51 2023 +0200
+++ b/rust/hg-core/src/operations/debugdata.rs	Mon Sep 18 17:11:11 2023 +0200
@@ -6,11 +6,10 @@
 // GNU General Public License version 2 or any later version.
 
 use crate::repo::Repo;
-use crate::requirements;
 use crate::revlog::{Revlog, RevlogError};
 
 /// Kind of data to debug
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub enum DebugDataKind {
     Changelog,
     Manifest,
@@ -26,11 +25,12 @@
         DebugDataKind::Changelog => "00changelog.i",
         DebugDataKind::Manifest => "00manifest.i",
     };
-    let use_nodemap = repo
-        .requirements()
-        .contains(requirements::NODEMAP_REQUIREMENT);
-    let revlog =
-        Revlog::open(&repo.store_vfs(), index_file, None, use_nodemap)?;
+    let revlog = Revlog::open(
+        &repo.store_vfs(),
+        index_file,
+        None,
+        repo.default_revlog_options(kind == DebugDataKind::Changelog)?,
+    )?;
     let rev =
         crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
     let data = revlog.get_rev_data_for_checked_rev(rev)?;
--- a/rust/hg-core/src/repo.rs	Tue Jun 27 17:34:51 2023 +0200
+++ b/rust/hg-core/src/repo.rs	Mon Sep 18 17:11:11 2023 +0200
@@ -8,6 +8,10 @@
 use crate::errors::{HgError, IoResultExt};
 use crate::lock::{try_with_lock_no_wait, LockError};
 use crate::manifest::{Manifest, Manifestlog};
+use crate::requirements::{
+    CHANGELOGV2_REQUIREMENT, GENERALDELTA_REQUIREMENT, NODEMAP_REQUIREMENT,
+    REVLOGV1_REQUIREMENT, REVLOGV2_REQUIREMENT,
+};
 use crate::revlog::filelog::Filelog;
 use crate::revlog::RevlogError;
 use crate::utils::debug::debug_wait_for_file_or_print;
@@ -15,8 +19,10 @@
 use crate::utils::hg_path::HgPath;
 use crate::utils::SliceExt;
 use crate::vfs::{is_dir, is_file, Vfs};
-use crate::DirstateError;
-use crate::{requirements, NodePrefix, UncheckedRevision};
+use crate::{
+    requirements, NodePrefix, RevlogVersionOptions, UncheckedRevision,
+};
+use crate::{DirstateError, RevlogOpenOptions};
 use std::cell::{Ref, RefCell, RefMut};
 use std::collections::HashSet;
 use std::io::Seek;
@@ -523,7 +529,7 @@
     }
 
     fn new_changelog(&self) -> Result<Changelog, HgError> {
-        Changelog::open(&self.store_vfs(), self.has_nodemap())
+        Changelog::open(&self.store_vfs(), self.default_revlog_options(true)?)
     }
 
     pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
@@ -535,7 +541,10 @@
     }
 
     fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
-        Manifestlog::open(&self.store_vfs(), self.has_nodemap())
+        Manifestlog::open(
+            &self.store_vfs(),
+            self.default_revlog_options(false)?,
+        )
     }
 
     pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
@@ -581,7 +590,7 @@
     }
 
     pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
-        Filelog::open(self, path)
+        Filelog::open(self, path, self.default_revlog_options(false)?)
     }
 
     /// Write to disk any updates that were made through `dirstate_map_mut`.
@@ -730,6 +739,35 @@
         }
         Ok(())
     }
+
+    pub fn default_revlog_options(
+        &self,
+        changelog: bool,
+    ) -> Result<RevlogOpenOptions, HgError> {
+        let requirements = self.requirements();
+        let version = if changelog
+            && requirements.contains(CHANGELOGV2_REQUIREMENT)
+        {
+            let compute_rank = self
+                .config()
+                .get_bool(b"experimental", b"changelog-v2.compute-rank")?;
+            RevlogVersionOptions::ChangelogV2 { compute_rank }
+        } else if requirements.contains(REVLOGV2_REQUIREMENT) {
+            RevlogVersionOptions::V2
+        } else if requirements.contains(REVLOGV1_REQUIREMENT) {
+            RevlogVersionOptions::V1 {
+                generaldelta: requirements.contains(GENERALDELTA_REQUIREMENT),
+            }
+        } else {
+            RevlogVersionOptions::V0
+        };
+        Ok(RevlogOpenOptions {
+            version,
+            // We don't need to dance around the slow path like in the Python
+            // implementation since we know we have access to the fast code.
+            use_nodemap: requirements.contains(NODEMAP_REQUIREMENT),
+        })
+    }
 }
 
 /// Lazily-initialized component of `Repo` with interior mutability
--- a/rust/hg-core/src/requirements.rs	Tue Jun 27 17:34:51 2023 +0200
+++ b/rust/hg-core/src/requirements.rs	Mon Sep 18 17:11:11 2023 +0200
@@ -77,7 +77,7 @@
 
 /// rhg supports repository with or without these
 const SUPPORTED: &[&str] = &[
-    "generaldelta",
+    GENERALDELTA_REQUIREMENT,
     SHARED_REQUIREMENT,
     SHARESAFE_REQUIREMENT,
     SPARSEREVLOG_REQUIREMENT,
@@ -100,6 +100,7 @@
 // Copied from mercurial/requirements.py:
 
 pub const DIRSTATE_V2_REQUIREMENT: &str = "dirstate-v2";
+pub const GENERALDELTA_REQUIREMENT: &str = "generaldelta";
 
 /// A repository that uses the tracked hint dirstate file
 #[allow(unused)]
@@ -128,11 +129,20 @@
 #[allow(unused)]
 pub const TREEMANIFEST_REQUIREMENT: &str = "treemanifest";
 
+/// Whether to use the "RevlogNG" or V1 of the revlog format
+#[allow(unused)]
+pub const REVLOGV1_REQUIREMENT: &str = "revlogv1";
+
 /// Increment the sub-version when the revlog v2 format changes to lock out old
 /// clients.
 #[allow(unused)]
 pub const REVLOGV2_REQUIREMENT: &str = "exp-revlogv2.1";
 
+/// Increment the sub-version when the revlog v2 format changes to lock out old
+/// clients.
+#[allow(unused)]
+pub const CHANGELOGV2_REQUIREMENT: &str = "exp-changelog-v2";
+
 /// A repository with the sparserevlog feature will have delta chains that
 /// can spread over a larger span. Sparse reading cuts these large spans into
 /// pieces, so that each piece isn't too big.
--- a/rust/hg-core/src/revlog/changelog.rs	Tue Jun 27 17:34:51 2023 +0200
+++ b/rust/hg-core/src/revlog/changelog.rs	Mon Sep 18 17:11:11 2023 +0200
@@ -4,7 +4,7 @@
 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
 use crate::utils::hg_path::HgPath;
 use crate::vfs::Vfs;
-use crate::{Graph, GraphError, UncheckedRevision};
+use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
 use itertools::Itertools;
 use std::ascii::escape_default;
 use std::borrow::Cow;
@@ -18,9 +18,11 @@
 
 impl Changelog {
     /// Open the `changelog` of a repository given by its root.
-    pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
-        let revlog =
-            Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
+    pub fn open(
+        store_vfs: &Vfs,
+        options: RevlogOpenOptions,
+    ) -> Result<Self, HgError> {
+        let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
         Ok(Self { revlog })
     }
 
@@ -342,7 +344,9 @@
         let temp = tempfile::tempdir().unwrap();
         let vfs = Vfs { base: temp.path() };
         std::fs::write(temp.path().join("foo.i"), b"").unwrap();
-        let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
+        let revlog =
+            Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
+                .unwrap();
 
         let changelog = Changelog { revlog };
         assert_eq!(
--- a/rust/hg-core/src/revlog/filelog.rs	Tue Jun 27 17:34:51 2023 +0200
+++ b/rust/hg-core/src/revlog/filelog.rs	Mon Sep 18 17:11:11 2023 +0200
@@ -11,6 +11,7 @@
 use crate::utils::SliceExt;
 use crate::Graph;
 use crate::GraphError;
+use crate::RevlogOpenOptions;
 use crate::UncheckedRevision;
 use std::path::PathBuf;
 
@@ -30,16 +31,21 @@
     pub fn open_vfs(
         store_vfs: &crate::vfs::Vfs<'_>,
         file_path: &HgPath,
+        options: RevlogOpenOptions,
     ) -> Result<Self, HgError> {
         let index_path = store_path(file_path, b".i");
         let data_path = store_path(file_path, b".d");
         let revlog =
-            Revlog::open(store_vfs, index_path, Some(&data_path), false)?;
+            Revlog::open(store_vfs, index_path, Some(&data_path), options)?;
         Ok(Self { revlog })
     }
 
-    pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
-        Self::open_vfs(&repo.store_vfs(), file_path)
+    pub fn open(
+        repo: &Repo,
+        file_path: &HgPath,
+        options: RevlogOpenOptions,
+    ) -> Result<Self, HgError> {
+        Self::open_vfs(&repo.store_vfs(), file_path, options)
     }
 
     /// The given node ID is that of the file as found in a filelog, not of a
--- a/rust/hg-core/src/revlog/index.rs	Tue Jun 27 17:34:51 2023 +0200
+++ b/rust/hg-core/src/revlog/index.rs	Mon Sep 18 17:11:11 2023 +0200
@@ -15,7 +15,7 @@
 pub const COMPRESSION_MODE_INLINE: u8 = 2;
 
 pub struct IndexHeader {
-    header_bytes: [u8; 4],
+    pub(super) header_bytes: [u8; 4],
 }
 
 #[derive(Copy, Clone)]
@@ -54,32 +54,22 @@
         BigEndian::read_u16(&self.header_bytes[2..4])
     }
 
-    const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
-        // We treat an empty file as a valid index with no entries.
-        // Here we make an arbitrary choice of what we assume the format of the
-        // index to be (V1, using generaldelta).
-        // This doesn't matter too much, since we're only doing read-only
-        // access. but the value corresponds to the `new_header` variable in
-        // `revlog.py`, `_loadindex`
-        header_bytes: [0, 3, 0, 1],
-    };
-
-    fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
+    pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
         if index_bytes.is_empty() {
-            return Ok(IndexHeader::EMPTY_INDEX_HEADER);
+            return Ok(None);
         }
         if index_bytes.len() < 4 {
             return Err(HgError::corrupted(
                 "corrupted revlog: can't read the index format header",
             ));
         }
-        Ok(IndexHeader {
+        Ok(Some(IndexHeader {
             header_bytes: {
                 let bytes: [u8; 4] =
                     index_bytes[0..4].try_into().expect("impossible");
                 bytes
             },
-        })
+        }))
     }
 }
 
@@ -239,8 +229,10 @@
     /// Calculate the start of each entry when is_inline is true.
     pub fn new(
         bytes: Box<dyn Deref<Target = [u8]> + Send>,
+        default_header: IndexHeader,
     ) -> Result<Self, HgError> {
-        let header = IndexHeader::parse(bytes.as_ref())?;
+        let header =
+            IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
 
         if header.format_version() != IndexHeader::REVLOGV1 {
             // A proper new version should have had a repo/store
@@ -598,6 +590,7 @@
     pub fn is_inline(index_bytes: &[u8]) -> bool {
         IndexHeader::parse(index_bytes)
             .expect("too short")
+            .unwrap()
             .format_flags()
             .is_inline()
     }
@@ -605,6 +598,7 @@
     pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
         IndexHeader::parse(index_bytes)
             .expect("too short")
+            .unwrap()
             .format_flags()
             .uses_generaldelta()
     }
@@ -612,6 +606,7 @@
     pub fn get_version(index_bytes: &[u8]) -> u16 {
         IndexHeader::parse(index_bytes)
             .expect("too short")
+            .unwrap()
             .format_version()
     }
 
--- a/rust/hg-core/src/revlog/manifest.rs	Tue Jun 27 17:34:51 2023 +0200
+++ b/rust/hg-core/src/revlog/manifest.rs	Mon Sep 18 17:11:11 2023 +0200
@@ -4,12 +4,14 @@
 use crate::utils::hg_path::HgPath;
 use crate::utils::SliceExt;
 use crate::vfs::Vfs;
-use crate::{Graph, GraphError, Revision, UncheckedRevision};
+use crate::{
+    Graph, GraphError, Revision, RevlogOpenOptions, UncheckedRevision,
+};
 
 /// A specialized `Revlog` to work with `manifest` data format.
 pub struct Manifestlog {
     /// The generic `revlog` format.
-    revlog: Revlog,
+    pub(crate) revlog: Revlog,
 }
 
 impl Graph for Manifestlog {
@@ -20,9 +22,11 @@
 
 impl Manifestlog {
     /// Open the `manifest` of a repository given by its root.
-    pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
-        let revlog =
-            Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?;
+    pub fn open(
+        store_vfs: &Vfs,
+        options: RevlogOpenOptions,
+    ) -> Result<Self, HgError> {
+        let revlog = Revlog::open(store_vfs, "00manifest.i", None, options)?;
         Ok(Self { revlog })
     }
 
--- a/rust/hg-core/src/revlog/mod.rs	Tue Jun 27 17:34:51 2023 +0200
+++ b/rust/hg-core/src/revlog/mod.rs	Mon Sep 18 17:11:11 2023 +0200
@@ -225,6 +225,55 @@
     }
 }
 
+#[derive(Debug, Copy, Clone)]
+pub enum RevlogVersionOptions {
+    V0,
+    V1 { generaldelta: bool },
+    V2,
+    ChangelogV2 { compute_rank: bool },
+}
+
+/// Options to govern how a revlog should be opened, usually from the
+/// repository configuration or requirements.
+#[derive(Debug, Copy, Clone)]
+pub struct RevlogOpenOptions {
+    /// The revlog version, along with any option specific to this version
+    pub version: RevlogVersionOptions,
+    /// Whether the revlog uses a persistent nodemap.
+    pub use_nodemap: bool,
+    // TODO other non-header/version options,
+}
+
+impl RevlogOpenOptions {
+    pub fn new() -> Self {
+        Self {
+            version: RevlogVersionOptions::V1 { generaldelta: true },
+            use_nodemap: false,
+        }
+    }
+
+    fn default_index_header(&self) -> index::IndexHeader {
+        index::IndexHeader {
+            header_bytes: match self.version {
+                RevlogVersionOptions::V0 => [0, 0, 0, 0],
+                RevlogVersionOptions::V1 { generaldelta } => {
+                    [0, if generaldelta { 3 } else { 1 }, 0, 1]
+                }
+                RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
+                RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
+                    0xD34Du32.to_be_bytes()
+                }
+            },
+        }
+    }
+}
+
+impl Default for RevlogOpenOptions {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl Revlog {
     /// Open a revlog index file.
     ///
@@ -234,24 +283,30 @@
         store_vfs: &Vfs,
         index_path: impl AsRef<Path>,
         data_path: Option<&Path>,
-        use_nodemap: bool,
+        options: RevlogOpenOptions,
     ) -> Result<Self, HgError> {
-        Self::open_gen(store_vfs, index_path, data_path, use_nodemap, None)
+        Self::open_gen(store_vfs, index_path, data_path, options, None)
     }
 
     fn open_gen(
         store_vfs: &Vfs,
         index_path: impl AsRef<Path>,
         data_path: Option<&Path>,
-        use_nodemap: bool,
+        options: RevlogOpenOptions,
         nodemap_for_test: Option<nodemap::NodeTree>,
     ) -> Result<Self, HgError> {
         let index_path = index_path.as_ref();
         let index = {
             match store_vfs.mmap_open_opt(index_path)? {
-                None => Index::new(Box::<Vec<_>>::default()),
+                None => Index::new(
+                    Box::<Vec<_>>::default(),
+                    options.default_index_header(),
+                ),
                 Some(index_mmap) => {
-                    let index = Index::new(Box::new(index_mmap))?;
+                    let index = Index::new(
+                        Box::new(index_mmap),
+                        options.default_index_header(),
+                    )?;
                     Ok(index)
                 }
             }
@@ -270,7 +325,7 @@
                 Some(Box::new(data_mmap))
             };
 
-        let nodemap = if index.is_inline() || !use_nodemap {
+        let nodemap = if index.is_inline() || !options.use_nodemap {
             None
         } else {
             NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
@@ -809,7 +864,9 @@
         let temp = tempfile::tempdir().unwrap();
         let vfs = Vfs { base: temp.path() };
         std::fs::write(temp.path().join("foo.i"), b"").unwrap();
-        let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
+        let revlog =
+            Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
+                .unwrap();
         assert!(revlog.is_empty());
         assert_eq!(revlog.len(), 0);
         assert!(revlog.get_entry(0.into()).is_err());
@@ -855,7 +912,9 @@
             .flatten()
             .collect_vec();
         std::fs::write(temp.path().join("foo.i"), contents).unwrap();
-        let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
+        let revlog =
+            Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
+                .unwrap();
 
         let entry0 = revlog.get_entry(0.into()).ok().unwrap();
         assert_eq!(entry0.revision(), Revision(0));
@@ -926,8 +985,14 @@
         idx.insert_node(Revision(0), node0).unwrap();
         idx.insert_node(Revision(1), node1).unwrap();
 
-        let revlog =
-            Revlog::open_gen(&vfs, "foo.i", None, true, Some(idx.nt)).unwrap();
+        let revlog = Revlog::open_gen(
+            &vfs,
+            "foo.i",
+            None,
+            RevlogOpenOptions::new(),
+            Some(idx.nt),
+        )
+        .unwrap();
 
         // accessing the data shows the corruption
         revlog.get_entry(0.into()).unwrap().data().unwrap_err();
--- a/rust/hg-cpython/src/revlog.rs	Tue Jun 27 17:34:51 2023 +0200
+++ b/rust/hg-cpython/src/revlog.rs	Mon Sep 18 17:11:11 2023 +0200
@@ -17,6 +17,7 @@
     PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
 };
 use hg::{
+    index::IndexHeader,
     nodemap::{Block, NodeMapError, NodeTree},
     revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
     BaseRevision, Revision, UncheckedRevision,
@@ -47,9 +48,10 @@
     def __new__(
         _cls,
         cindex: PyObject,
-        data: PyObject
+        data: PyObject,
+        default_header: u32,
     ) -> PyResult<MixedIndex> {
-        Self::new(py, cindex, data)
+        Self::new(py, cindex, data, default_header)
     }
 
     /// Compatibility layer used for Python consumers needing access to the C index
@@ -364,6 +366,7 @@
         py: Python,
         cindex: PyObject,
         data: PyObject,
+        header: u32,
     ) -> PyResult<MixedIndex> {
         // Safety: we keep the buffer around inside the class as `index_mmap`
         let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
@@ -371,7 +374,15 @@
         Self::create_instance(
             py,
             RefCell::new(cindex::Index::new(py, cindex)?),
-            RefCell::new(hg::index::Index::new(bytes).unwrap()),
+            RefCell::new(
+                hg::index::Index::new(
+                    bytes,
+                    IndexHeader::parse(&header.to_be_bytes())
+                        .expect("default header is broken")
+                        .unwrap(),
+                )
+                .unwrap(),
+            ),
             RefCell::new(None),
             RefCell::new(None),
             RefCell::new(None),
--- a/rust/rhg/src/commands/status.rs	Tue Jun 27 17:34:51 2023 +0200
+++ b/rust/rhg/src/commands/status.rs	Mon Sep 18 17:11:11 2023 +0200
@@ -28,12 +28,12 @@
     get_bytes_from_os_str, get_bytes_from_os_string, get_path_from_bytes,
 };
 use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
-use hg::DirstateStatus;
 use hg::PatternFileWarning;
 use hg::Revision;
 use hg::StatusError;
 use hg::StatusOptions;
 use hg::{self, narrow, sparse};
+use hg::{DirstateStatus, RevlogOpenOptions};
 use log::info;
 use rayon::prelude::*;
 use std::borrow::Cow;
@@ -383,6 +383,7 @@
             })?;
             let working_directory_vfs = repo.working_directory_vfs();
             let store_vfs = repo.store_vfs();
+            let revlog_open_options = repo.default_revlog_options(false)?;
             let res: Vec<_> = take(&mut ds_status.unsure)
                 .into_par_iter()
                 .map(|to_check| {
@@ -396,6 +397,7 @@
                         check_exec,
                         &manifest,
                         &to_check.path,
+                        revlog_open_options,
                     ) {
                         Err(HgError::IoError { .. }) => {
                             // IO errors most likely stem from the file being
@@ -747,6 +749,7 @@
     check_exec: bool,
     manifest: &Manifest,
     hg_path: &HgPath,
+    revlog_open_options: RevlogOpenOptions,
 ) -> Result<UnsureOutcome, HgError> {
     let vfs = working_directory_vfs;
     let fs_path = hg_path_to_path_buf(hg_path).expect("HgPath conversion");
@@ -778,7 +781,11 @@
     if entry_flags != fs_flags {
         return Ok(UnsureOutcome::Modified);
     }
-    let filelog = hg::filelog::Filelog::open_vfs(&store_vfs, hg_path)?;
+    let filelog = hg::filelog::Filelog::open_vfs(
+        &store_vfs,
+        hg_path,
+        revlog_open_options,
+    )?;
     let fs_len = fs_metadata.len();
     let file_node = entry.node_id()?;
     let filelog_entry = filelog.entry_for_node(file_node).map_err(|_| {
--- a/tests/test-rust-revlog.py	Tue Jun 27 17:34:51 2023 +0200
+++ b/tests/test-rust-revlog.py	Mon Sep 18 17:11:11 2023 +0200
@@ -1,3 +1,4 @@
+import struct
 import unittest
 
 try:
@@ -14,6 +15,8 @@
 
 from mercurial.testing import revlog as revlogtesting
 
+header = struct.unpack(">I", revlogtesting.data_non_inlined[:4])[0]
+
 
 @unittest.skipIf(
     rustext is None,
@@ -22,24 +25,24 @@
 class RustRevlogIndexTest(revlogtesting.RevlogBasedTestBase):
     def test_heads(self):
         idx = self.parseindex()
-        rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined)
+        rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined, header)
         self.assertEqual(rustidx.headrevs(), idx.headrevs())
 
     def test_get_cindex(self):
         # drop me once we no longer need the method for shortest node
         idx = self.parseindex()
-        rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined)
+        rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined, header)
         cidx = rustidx.get_cindex()
         self.assertTrue(idx is cidx)
 
     def test_len(self):
         idx = self.parseindex()
-        rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined)
+        rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined, header)
         self.assertEqual(len(rustidx), len(idx))
 
     def test_ancestors(self):
         idx = self.parseindex()
-        rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined)
+        rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined, header)
         lazy = LazyAncestors(rustidx, [3], 0, True)
         # we have two more references to the index:
         # - in its inner iterator for __contains__ and __bool__