repair: improve performance of detection of revisions affected by issue6528 stable
authorRaphaël Gomès <rgomes@octobus.net>
Thu, 05 Aug 2021 17:00:03 +0200
branchstable
changeset 47816 32e21ac3adb1
parent 47815 b30a53ffbf9b
child 47817 855463b5fe49
repair: improve performance of detection of revisions affected by issue6528 Explanations inside the patch. I've tested this on Mozilla-Central and it's 5 times faster than the naive approach on my laptop. Differential Revision: https://phab.mercurial-scm.org/D11262
mercurial/revlogutils/rewrite.py
--- a/mercurial/revlogutils/rewrite.py	Tue Jul 27 21:45:27 2021 +0200
+++ b/mercurial/revlogutils/rewrite.py	Thu Aug 05 17:00:03 2021 +0200
@@ -10,6 +10,7 @@
 import binascii
 import contextlib
 import os
+import struct
 
 from ..node import (
     nullrev,
@@ -561,7 +562,7 @@
             util.tryunlink(new_file_path)
 
 
-def _is_revision_affected(ui, fl, filerev, path):
+def _is_revision_affected(fl, filerev, metadata_cache=None):
     """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
     special meaning compared to the reverse in the context of filelog-based
     copytracing. issue6528 exists because new code assumed that parent ordering
@@ -574,6 +575,8 @@
         # We don't care about censored nodes as they never carry metadata
         return False
     has_meta = raw_text.startswith(b'\x01\n')
+    if metadata_cache is not None:
+        metadata_cache[filerev] = has_meta
     if has_meta:
         (p1, p2) = fl.parentrevs(filerev)
         if p1 != nullrev and p2 == nullrev:
@@ -581,6 +584,54 @@
     return False
 
 
+def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
+    """Optimization fast-path for `_is_revision_affected`.
+
+    `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
+    revision to check if its base has metadata, saving computation of the full
+    text, instead looking at the current delta.
+
+    This optimization only works if the revisions are looked at in order."""
+    rl = fl._revlog
+
+    if rl.iscensored(filerev):
+        # Censored revisions don't contain metadata, so they cannot be affected
+        metadata_cache[filerev] = False
+        return False
+
+    p1, p2 = rl.parentrevs(filerev)
+    if p1 == nullrev or p2 != nullrev:
+        return False
+
+    delta_parent = rl.deltaparent(filerev)
+    parent_has_metadata = metadata_cache.get(delta_parent)
+    if parent_has_metadata is None:
+        is_affected = _is_revision_affected(fl, filerev, metadata_cache)
+        return is_affected
+
+    chunk = rl._chunk(filerev)
+    if not len(chunk):
+        # No diff for this revision
+        return parent_has_metadata
+
+    header_length = 12
+    if len(chunk) < header_length:
+        raise error.Abort(_(b"patch cannot be decoded"))
+
+    start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
+
+    if start < 2:  # len(b'\x01\n') == 2
+        # This delta does *something* to the metadata marker (if any).
+        # Check it the slow way
+        is_affected = _is_revision_affected(fl, filerev, metadata_cache)
+        return is_affected
+
+    # The diff did not remove or add the metadata header, it's then in the same
+    # situation as its parent
+    metadata_cache[filerev] = parent_has_metadata
+    return parent_has_metadata
+
+
 def _from_report(ui, repo, context, from_report, dry_run):
     """
     Fix the revisions given in the `from_report` file, but still checks if the
@@ -603,7 +654,7 @@
             excluded = set()
 
             for filerev in to_fix:
-                if _is_revision_affected(ui, fl, filerev, filename):
+                if _is_revision_affected(fl, filerev):
                     msg = b"found affected revision %d for filelog '%s'\n"
                     ui.warn(msg % (filerev, filename))
                 else:
@@ -663,11 +714,11 @@
 
             # Set of filerevs (or hex filenodes if `to_report`) that need fixing
             to_fix = set()
+            metadata_cache = {}
             for filerev in fl.revs():
-                # TODO speed up by looking at the start of the delta
-                # If it hasn't changed, it's not worth looking at the other revs
-                # in the same chain
-                affected = _is_revision_affected(ui, fl, filerev, path)
+                affected = _is_revision_affected_fast(
+                    repo, fl, filerev, metadata_cache
+                )
                 if affected:
                     msg = b"found affected revision %d for filelog '%s'\n"
                     ui.warn(msg % (filerev, path))