dirstate-v2: hash the source of the ignore patterns as well stable
authorRaphaël Gomès <rgomes@octobus.net>
Wed, 02 Nov 2022 12:05:34 +0100
branchstable
changeset 49558 363923bd51cd
parent 49557 ca19335e86e5
child 49559 7787174f0a5a
dirstate-v2: hash the source of the ignore patterns as well Fixes the test introduced in the last changeset. This caused the hash to change, which means that the check in the test had to be adapted. Since this hash is only done as a caching mechanism, invalidation does not pose any backwards compatibility issues.
mercurial/helptext/internals/dirstate-v2.txt
rust/hg-core/src/dirstate_tree/status.rs
rust/hg-core/src/filepatterns.rs
rust/hg-core/src/matchers.rs
rust/rhg/src/commands/debugignorerhg.rs
tests/test-hgignore.t
--- a/mercurial/helptext/internals/dirstate-v2.txt	Wed Nov 02 15:24:39 2022 +0100
+++ b/mercurial/helptext/internals/dirstate-v2.txt	Wed Nov 02 12:05:34 2022 +0100
@@ -283,8 +283,16 @@
   in inclusion order. This definition is recursive, as included files can
   themselves include more files.
 
-This hash is defined as the SHA-1 of the concatenation (in sorted
-order) of the "expanded contents" of each "root" ignore file.
+* "filepath" as the bytes of the ignore file path
+  relative to the root of the repository if inside the repository,
+  or the untouched path as defined in the configuration.
+
+This hash is defined as the SHA-1 of the following line format:
+
+<filepath> <sha1 of the "expanded contents">\n
+
+for each "root" ignore file. (in sorted order)
+
 (Note that computing this does not require actually concatenating
 into a single contiguous byte sequence.
 Instead a SHA-1 hasher object can be created
--- a/rust/hg-core/src/dirstate_tree/status.rs	Wed Nov 02 15:24:39 2022 +0100
+++ b/rust/hg-core/src/dirstate_tree/status.rs	Wed Nov 02 12:05:34 2022 +0100
@@ -10,6 +10,7 @@
 use crate::matchers::get_ignore_function;
 use crate::matchers::Matcher;
 use crate::utils::files::get_bytes_from_os_string;
+use crate::utils::files::get_bytes_from_path;
 use crate::utils::files::get_path_from_bytes;
 use crate::utils::hg_path::HgPath;
 use crate::BadMatch;
@@ -66,7 +67,7 @@
                     let (ignore_fn, warnings) = get_ignore_function(
                         ignore_files,
                         &root_dir,
-                        &mut |_pattern_bytes| {},
+                        &mut |_source, _pattern_bytes| {},
                     )?;
                     (ignore_fn, warnings, None)
                 }
@@ -75,7 +76,24 @@
                     let (ignore_fn, warnings) = get_ignore_function(
                         ignore_files,
                         &root_dir,
-                        &mut |pattern_bytes| hasher.update(pattern_bytes),
+                        &mut |source, pattern_bytes| {
+                            // If inside the repo, use the relative version to
+                            // make it deterministic inside tests.
+                            // The performance hit should be negligible.
+                            let source = source
+                                .strip_prefix(&root_dir)
+                                .unwrap_or(source);
+                            let source = get_bytes_from_path(source);
+
+                            let mut subhasher = Sha1::new();
+                            subhasher.update(pattern_bytes);
+                            let patterns_hash = subhasher.finalize();
+
+                            hasher.update(source);
+                            hasher.update(b" ");
+                            hasher.update(patterns_hash);
+                            hasher.update(b"\n");
+                        },
                     )?;
                     let new_hash = *hasher.finalize().as_ref();
                     let changed = new_hash != dmap.ignore_patterns_hash;
--- a/rust/hg-core/src/filepatterns.rs	Wed Nov 02 15:24:39 2022 +0100
+++ b/rust/hg-core/src/filepatterns.rs	Wed Nov 02 12:05:34 2022 +0100
@@ -412,11 +412,11 @@
 pub fn read_pattern_file(
     file_path: &Path,
     warn: bool,
-    inspect_pattern_bytes: &mut impl FnMut(&[u8]),
+    inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
 ) -> Result<(Vec<IgnorePattern>, Vec<PatternFileWarning>), PatternError> {
     match std::fs::read(file_path) {
         Ok(contents) => {
-            inspect_pattern_bytes(&contents);
+            inspect_pattern_bytes(file_path, &contents);
             parse_pattern_file_contents(&contents, file_path, None, warn)
         }
         Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok((
@@ -455,7 +455,7 @@
 pub fn get_patterns_from_file(
     pattern_file: &Path,
     root_dir: &Path,
-    inspect_pattern_bytes: &mut impl FnMut(&[u8]),
+    inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
 ) -> PatternResult<(Vec<IgnorePattern>, Vec<PatternFileWarning>)> {
     let (patterns, mut warnings) =
         read_pattern_file(pattern_file, true, inspect_pattern_bytes)?;
--- a/rust/hg-core/src/matchers.rs	Wed Nov 02 15:24:39 2022 +0100
+++ b/rust/hg-core/src/matchers.rs	Wed Nov 02 12:05:34 2022 +0100
@@ -838,7 +838,7 @@
 pub fn get_ignore_matcher<'a>(
     mut all_pattern_files: Vec<PathBuf>,
     root_dir: &Path,
-    inspect_pattern_bytes: &mut impl FnMut(&[u8]),
+    inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
 ) -> PatternResult<(IncludeMatcher<'a>, Vec<PatternFileWarning>)> {
     let mut all_patterns = vec![];
     let mut all_warnings = vec![];
@@ -871,7 +871,7 @@
 pub fn get_ignore_function<'a>(
     all_pattern_files: Vec<PathBuf>,
     root_dir: &Path,
-    inspect_pattern_bytes: &mut impl FnMut(&[u8]),
+    inspect_pattern_bytes: &mut impl FnMut(&Path, &[u8]),
 ) -> PatternResult<(IgnoreFnType<'a>, Vec<PatternFileWarning>)> {
     let res =
         get_ignore_matcher(all_pattern_files, root_dir, inspect_pattern_bytes);
--- a/rust/rhg/src/commands/debugignorerhg.rs	Wed Nov 02 15:24:39 2022 +0100
+++ b/rust/rhg/src/commands/debugignorerhg.rs	Wed Nov 02 12:05:34 2022 +0100
@@ -25,7 +25,7 @@
     let (ignore_matcher, warnings) = get_ignore_matcher(
         vec![ignore_file],
         &repo.working_directory_path().to_owned(),
-        &mut |_pattern_bytes| (),
+        &mut |_source, _pattern_bytes| (),
     )
     .map_err(|e| StatusError::from(e))?;
 
--- a/tests/test-hgignore.t	Wed Nov 02 15:24:39 2022 +0100
+++ b/tests/test-hgignore.t	Wed Nov 02 12:05:34 2022 +0100
@@ -421,18 +421,24 @@
 Check the hash of ignore patterns written in the dirstate
 This is an optimization that is only relevant when using the Rust extensions
 
+  $ cat_filename_and_hash () {
+  >     for i in "$@"; do
+  >         printf "$i "
+  >         cat "$i" | "$TESTDIR"/f --raw-sha1 | sed 's/^raw-sha1=//'
+  >     done
+  > }
   $ hg status > /dev/null
-  $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
-  sha1=6e315b60f15fb5dfa02be00f3e2c8f923051f5ff
+  $ cat_filename_and_hash .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
+  sha1=c0beb296395d48ced8e14f39009c4ea6e409bfe6
   $ hg debugstate --docket | grep ignore
-  ignore pattern hash: 6e315b60f15fb5dfa02be00f3e2c8f923051f5ff
+  ignore pattern hash: c0beb296395d48ced8e14f39009c4ea6e409bfe6
 
   $ echo rel > .hg/testhgignorerel
   $ hg status > /dev/null
-  $ cat .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
-  sha1=dea19cc7119213f24b6b582a4bae7b0cb063e34e
+  $ cat_filename_and_hash .hg/testhgignore .hg/testhgignorerel .hgignore dir2/.hgignore dir1/.hgignore dir1/.hgignoretwo | $TESTDIR/f --sha1
+  sha1=b8e63d3428ec38abc68baa27631516d5ec46b7fa
   $ hg debugstate --docket | grep ignore
-  ignore pattern hash: dea19cc7119213f24b6b582a4bae7b0cb063e34e
+  ignore pattern hash: b8e63d3428ec38abc68baa27631516d5ec46b7fa
   $ cd ..
 
 Check that the hash depends on the source of the hgignore patterns
@@ -460,6 +466,6 @@
   $ hg status
   M dir1/.hgignore
   M dir2/.hgignore
-  ? dir1/subdir/ignored1 (missing-correct-output !)
+  ? dir1/subdir/ignored1
 
 #endif