deltas: add a debug-delta-find command to analyse delta search
authorPierre-Yves David <pierre-yves.david@octobus.net>
Fri, 20 May 2022 14:27:46 +0200
changeset 49228 b909dd35d9ab
parent 49227 2bcf5e14bb7e
child 49229 ed9170ff791a
deltas: add a debug-delta-find command to analyse delta search See command documentation for details. For some reason, pytype is confused by our usage of None/deltainfo variable, so I had to quiet it.
mercurial/debugcommands.py
mercurial/revlogutils/deltas.py
tests/test-completion.t
tests/test-help.t
tests/test-sparse-revlog.t
--- a/mercurial/debugcommands.py	Thu May 19 23:39:42 2022 +0100
+++ b/mercurial/debugcommands.py	Fri May 20 14:27:46 2022 +0200
@@ -73,6 +73,7 @@
     repoview,
     requirements,
     revlog,
+    revlogutils,
     revset,
     revsetlang,
     scmutil,
@@ -989,6 +990,65 @@
 
 
 @command(
+    b'debug-delta-find',
+    cmdutil.debugrevlogopts + cmdutil.formatteropts,
+    _(b'-c|-m|FILE REV'),
+    optionalrepo=True,
+)
+def debugdeltafind(ui, repo, arg_1, arg_2=None, **opts):
+    """display the computation to get to a valid delta for storing REV
+
+    This command will replay the process used to find the "best" delta to store
+    a revision and display information about all the steps used to get to that
+    result.
+
+    The revision use the revision number of the target storage (not changelog
+    revision number).
+
+    note: the process is initiated from a full text of the revision to store.
+    """
+    opts = pycompat.byteskwargs(opts)
+    if arg_2 is None:
+        file_ = None
+        rev = arg_1
+    else:
+        file_ = arg_1
+        rev = arg_2
+
+    rev = int(rev)
+
+    revlog = cmdutil.openrevlog(repo, b'debugdeltachain', file_, opts)
+
+    deltacomputer = deltautil.deltacomputer(
+        revlog,
+        write_debug=ui.write,
+        debug_search=True,
+    )
+
+    node = revlog.node(rev)
+    p1r, p2r = revlog.parentrevs(rev)
+    p1 = revlog.node(p1r)
+    p2 = revlog.node(p2r)
+    btext = [revlog.revision(rev)]
+    textlen = len(btext[0])
+    cachedelta = None
+    flags = revlog.flags(rev)
+
+    revinfo = revlogutils.revisioninfo(
+        node,
+        p1,
+        p2,
+        btext,
+        textlen,
+        cachedelta,
+        flags,
+    )
+
+    fh = revlog._datafp()
+    deltacomputer.finddeltainfo(revinfo, fh, target_rev=rev)
+
+
+@command(
     b'debugdirstate|debugstate',
     [
         (
--- a/mercurial/revlogutils/deltas.py	Thu May 19 23:39:42 2022 +0100
+++ b/mercurial/revlogutils/deltas.py	Fri May 20 14:27:46 2022 +0200
@@ -931,9 +931,10 @@
 
 
 class deltacomputer:
-    def __init__(self, revlog, write_debug=None):
+    def __init__(self, revlog, write_debug=None, debug_search=False):
         self.revlog = revlog
         self._write_debug = write_debug
+        self._debug_search = debug_search
 
     def buildtext(self, revinfo, fh):
         """Builds a fulltext version of a revision
@@ -980,6 +981,7 @@
     def _builddeltainfo(self, revinfo, base, fh):
         # can we use the cached delta?
         revlog = self.revlog
+        debug_search = self._write_debug is not None and self._debug_search
         chainbase = revlog.chainbase(base)
         if revlog._generaldelta:
             deltabase = base
@@ -1009,13 +1011,27 @@
                 delta = revinfo.cachedelta[1]
         if delta is None:
             delta = self._builddeltadiff(base, revinfo, fh)
+        if debug_search:
+            msg = b"DBG-DELTAS-SEARCH:     uncompressed-delta-size=%d\n"
+            msg %= len(delta)
+            self._write_debug(msg)
         # snapshotdept need to be neither None nor 0 level snapshot
         if revlog.upperboundcomp is not None and snapshotdepth:
             lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
             snapshotlimit = revinfo.textlen >> snapshotdepth
+            if debug_search:
+                msg = b"DBG-DELTAS-SEARCH:     projected-lower-size=%d\n"
+                msg %= lowestrealisticdeltalen
+                self._write_debug(msg)
             if snapshotlimit < lowestrealisticdeltalen:
+                if debug_search:
+                    msg = b"DBG-DELTAS-SEARCH:     DISCARDED (snapshot limit)\n"
+                    self._write_debug(msg)
                 return None
             if revlog.length(base) < lowestrealisticdeltalen:
+                if debug_search:
+                    msg = b"DBG-DELTAS-SEARCH:     DISCARDED (prev size)\n"
+                    self._write_debug(msg)
                 return None
         header, data = revlog.compress(delta)
         deltalen = len(header) + len(data)
@@ -1090,6 +1106,8 @@
         if self._write_debug is not None:
             start = util.timer()
 
+        debug_search = self._write_debug is not None and self._debug_search
+
         # count the number of different delta we tried (for debug purpose)
         dbg_try_count = 0
         # count the number of "search round" we did. (for debug purpose)
@@ -1113,6 +1131,10 @@
                 p2_chain_len = revlog._chaininfo(p2r)[0]
             else:
                 p2_chain_len = -1
+        if debug_search:
+            msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
+            msg %= target_rev
+            self._write_debug(msg)
 
         groups = _candidategroups(
             self.revlog, revinfo.textlen, p1r, p2r, cachedelta
@@ -1120,21 +1142,93 @@
         candidaterevs = next(groups)
         while candidaterevs is not None:
             dbg_try_rounds += 1
+            if debug_search:
+                prev = None
+                if deltainfo is not None:
+                    prev = deltainfo.base
+
+                if p1 in candidaterevs or p2 in candidaterevs:
+                    round_type = b"parents"
+                elif prev is not None and all(c < prev for c in candidaterevs):
+                    round_type = b"refine-down"
+                elif prev is not None and all(c > prev for c in candidaterevs):
+                    round_type = b"refine-up"
+                else:
+                    round_type = b"search-down"
+                msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
+                msg %= (dbg_try_rounds, len(candidaterevs), round_type)
+                self._write_debug(msg)
             nominateddeltas = []
             if deltainfo is not None:
+                if debug_search:
+                    msg = (
+                        b"DBG-DELTAS-SEARCH:   CONTENDER: rev=%d - length=%d\n"
+                    )
+                    msg %= (deltainfo.base, deltainfo.deltalen)
+                    self._write_debug(msg)
                 # if we already found a good delta,
                 # challenge it against refined candidates
                 nominateddeltas.append(deltainfo)
             for candidaterev in candidaterevs:
+                if debug_search:
+                    msg = b"DBG-DELTAS-SEARCH:   CANDIDATE: rev=%d\n"
+                    msg %= candidaterev
+                    self._write_debug(msg)
+                    candidate_type = None
+                    if candidaterev == p1:
+                        candidate_type = b"p1"
+                    elif candidaterev == p2:
+                        candidate_type = b"p2"
+                    elif self.revlog.issnapshot(candidaterev):
+                        candidate_type = b"snapshot-%d"
+                        candidate_type %= self.revlog.snapshotdepth(
+                            candidaterev
+                        )
+
+                    if candidate_type is not None:
+                        msg = b"DBG-DELTAS-SEARCH:     type=%s\n"
+                        msg %= candidate_type
+                        self._write_debug(msg)
+                    msg = b"DBG-DELTAS-SEARCH:     size=%d\n"
+                    msg %= self.revlog.length(candidaterev)
+                    self._write_debug(msg)
+                    msg = b"DBG-DELTAS-SEARCH:     base=%d\n"
+                    msg %= self.revlog.deltaparent(candidaterev)
+                    self._write_debug(msg)
                 if candidaterev in excluded_bases:
+                    if debug_search:
+                        msg = b"DBG-DELTAS-SEARCH:     EXCLUDED\n"
+                        self._write_debug(msg)
                     continue
                 if candidaterev >= target_rev:
+                    if debug_search:
+                        msg = b"DBG-DELTAS-SEARCH:     TOO-HIGH\n"
+                        self._write_debug(msg)
                     continue
                 dbg_try_count += 1
+
+                if debug_search:
+                    delta_start = util.timer()
                 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
+                if debug_search:
+                    delta_end = util.timer()
+                    msg = b"DBG-DELTAS-SEARCH:     delta-search-time=%f\n"
+                    msg %= delta_end - delta_start
+                    self._write_debug(msg)
                 if candidatedelta is not None:
                     if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
+                        if debug_search:
+                            msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (GOOD)\n"
+                            msg %= candidatedelta.deltalen
+                            self._write_debug(msg)
                         nominateddeltas.append(candidatedelta)
+                    elif debug_search:
+                        msg = b"DBG-DELTAS-SEARCH:     DELTA: length=%d (BAD)\n"
+                        msg %= candidatedelta.deltalen
+                        self._write_debug(msg)
+                elif debug_search:
+                    msg = b"DBG-DELTAS-SEARCH:     NO-DELTA\n"
+                    self._write_debug(msg)
             if nominateddeltas:
                 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
             if deltainfo is not None:
@@ -1145,7 +1239,7 @@
         if deltainfo is None:
             dbg_type = b"full"
             deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
-        elif deltainfo.snapshotdepth:
+        elif deltainfo.snapshotdepth:  # pytype: disable=attribute-error
             dbg_type = b"snapshot"
         else:
             dbg_type = b"delta"
@@ -1161,8 +1255,13 @@
                 'p1-chain-len': p1_chain_len,
                 'p2-chain-len': p2_chain_len,
             }
-            if deltainfo.snapshotdepth is not None:
-                dbg['snapshot-depth'] = deltainfo.snapshotdepth
+            if (
+                deltainfo.snapshotdepth  # pytype: disable=attribute-error
+                is not None
+            ):
+                dbg[
+                    'snapshot-depth'
+                ] = deltainfo.snapshotdepth  # pytype: disable=attribute-error
             else:
                 dbg['snapshot-depth'] = 0
             target_revlog = b"UNKNOWN"
--- a/tests/test-completion.t	Thu May 19 23:39:42 2022 +0100
+++ b/tests/test-completion.t	Fri May 20 14:27:46 2022 +0200
@@ -74,6 +74,7 @@
 
 Show debug commands if there are no other candidates
   $ hg debugcomplete debug
+  debug-delta-find
   debug-repair-issue6528
   debugancestor
   debugantivirusrunning
@@ -267,6 +268,7 @@
   config: untrusted, exp-all-known, edit, local, source, shared, non-shared, global, template
   continue: dry-run
   copy: forget, after, at-rev, force, include, exclude, dry-run
+  debug-delta-find: changelog, manifest, dir, template
   debug-repair-issue6528: to-report, from-report, paranoid, dry-run
   debugancestor: 
   debugantivirusrunning: 
--- a/tests/test-help.t	Thu May 19 23:39:42 2022 +0100
+++ b/tests/test-help.t	Fri May 20 14:27:46 2022 +0200
@@ -978,6 +978,8 @@
   $ hg help debug
   debug commands (internal and unsupported):
   
+   debug-delta-find
+                 display the computation to get to a valid delta for storing REV
    debug-repair-issue6528
                  find affected revisions and repair them. See issue6528 for more
                  details.
--- a/tests/test-sparse-revlog.t	Thu May 19 23:39:42 2022 +0100
+++ b/tests/test-sparse-revlog.t	Fri May 20 14:27:46 2022 +0200
@@ -149,4 +149,46 @@
   deltas against p2    :   63 ( 1.36%)
   deltas against other :    0 ( 0.00%)
 
+
+Test `debug-delta-find`
+-----------------------
+
+  $ ls -1
+  SPARSE-REVLOG-TEST-FILE
+  $ hg debugdeltachain SPARSE-REVLOG-TEST-FILE | grep other | tail -1
+     4971       3        5     4930   other      19179     346472     427596   1.23414  15994877  15567281   36.40652     427596     179288   1.00000        5
+  $ hg debug-delta-find SPARSE-REVLOG-TEST-FILE 4971
+  DBG-DELTAS-SEARCH: SEARCH rev=4971
+  DBG-DELTAS-SEARCH: ROUND #1 - 2 candidates - search-down
+  DBG-DELTAS-SEARCH:   CANDIDATE: rev=4962
+  DBG-DELTAS-SEARCH:     type=snapshot-4
+  DBG-DELTAS-SEARCH:     size=18296
+  DBG-DELTAS-SEARCH:     base=4930
+  DBG-DELTAS-SEARCH:     uncompressed-delta-size=30377
+  DBG-DELTAS-SEARCH:     delta-search-time=* (glob)
+  DBG-DELTAS-SEARCH:     DELTA: length=16872 (BAD)
+  DBG-DELTAS-SEARCH:   CANDIDATE: rev=4971
+  DBG-DELTAS-SEARCH:     type=snapshot-4
+  DBG-DELTAS-SEARCH:     size=19179
+  DBG-DELTAS-SEARCH:     base=4930
+  DBG-DELTAS-SEARCH:     TOO-HIGH
+  DBG-DELTAS-SEARCH: ROUND #2 - 1 candidates - search-down
+  DBG-DELTAS-SEARCH:   CANDIDATE: rev=4930
+  DBG-DELTAS-SEARCH:     type=snapshot-3
+  DBG-DELTAS-SEARCH:     size=39228
+  DBG-DELTAS-SEARCH:     base=4799
+  DBG-DELTAS-SEARCH:     uncompressed-delta-size=33050
+  DBG-DELTAS-SEARCH:     delta-search-time=* (glob)
+  DBG-DELTAS-SEARCH:     DELTA: length=19179 (GOOD)
+  DBG-DELTAS-SEARCH: ROUND #3 - 1 candidates - refine-down
+  DBG-DELTAS-SEARCH:   CONTENDER: rev=4930 - length=19179
+  DBG-DELTAS-SEARCH:   CANDIDATE: rev=4799
+  DBG-DELTAS-SEARCH:     type=snapshot-2
+  DBG-DELTAS-SEARCH:     size=50213
+  DBG-DELTAS-SEARCH:     base=4623
+  DBG-DELTAS-SEARCH:     uncompressed-delta-size=82661
+  DBG-DELTAS-SEARCH:     delta-search-time=* (glob)
+  DBG-DELTAS-SEARCH:     DELTA: length=49132 (BAD)
+  DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: search-rounds=3 try-count=3 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob)
+
   $ cd ..