commands: add debugdeltachain command
authorGregory Szorc <gregory.szorc@gmail.com>
Sat, 05 Dec 2015 23:37:46 -0800
changeset 27263 4efb36ecaaec
parent 27262 3d0feb2f978b
child 27264 e07003a94ef3
commands: add debugdeltachain command We have debug commands for displaying overall revlog statistics (debugrevlog) and for dumping a revlog index (debugindex). As part of investigating various aspects of revlog behavior and performance, I found it important to have an understanding of how revlog delta chains behave in practice. This patch implements a "debugdeltachain" command. For each revision in a revlog, it dumps information about the delta chain. Which delta chain it is part of, length of the delta chain, distance since base revision, info about base revision, size of the delta chain, etc. The generic formatting facility is used, which means we can templatize output and get machine readable output like JSON. This command has already uncovered some weird history in mozilla-central I didn't know about. So I think it's valuable.
mercurial/commands.py
tests/test-completion.t
tests/test-debugcommands.t
tests/test-help.t
--- a/mercurial/commands.py	Sat Oct 24 19:56:39 2015 +0100
+++ b/mercurial/commands.py	Sat Dec 05 23:37:46 2015 -0800
@@ -2501,6 +2501,117 @@
             ui.write("\t%d -> %d\n" % (r.rev(pp[1]), i))
     ui.write("}\n")
 
+@command('debugdeltachain',
+    debugrevlogopts + formatteropts,
+    _('-c|-m|FILE'),
+    optionalrepo=True)
+def debugdeltachain(ui, repo, file_=None, **opts):
+    """dump information about delta chains in a revlog
+
+    Output can be templatized. Available template keywords are:
+
+       rev          revision number
+       chainid      delta chain identifier (numbered by unique base)
+       chainlen     delta chain length to this revision
+       prevrev      previous revision in delta chain
+       deltatype    role of delta / how it was computed
+       compsize     compressed size of revision
+       uncompsize   uncompressed size of revision
+       chainsize    total size of compressed revisions in chain
+       chainratio   total chain size divided by uncompressed revision size
+                    (new delta chains typically start at ratio 2.00)
+       lindist      linear distance from base revision in delta chain to end
+                    of this revision
+       extradist    total size of revisions not part of this delta chain from
+                    base of delta chain to end of this revision; a measurement
+                    of how much extra data we need to read/seek across to read
+                    the delta chain for this revision
+       extraratio   extradist divided by chainsize; another representation of
+                    how much unrelated data is needed to load this delta chain
+    """
+    r = cmdutil.openrevlog(repo, 'debugdeltachain', file_, opts)
+    index = r.index
+    generaldelta = r.version & revlog.REVLOGGENERALDELTA
+
+    def revinfo(rev):
+        iterrev = rev
+        e = index[iterrev]
+        chain = []
+        compsize = e[1]
+        uncompsize = e[2]
+        chainsize = 0
+
+        if generaldelta:
+            if e[3] == e[5]:
+                deltatype = 'p1'
+            elif e[3] == e[6]:
+                deltatype = 'p2'
+            elif e[3] == rev - 1:
+                deltatype = 'prev'
+            elif e[3] == rev:
+                deltatype = 'base'
+            else:
+                deltatype = 'other'
+        else:
+            if e[3] == rev:
+                deltatype = 'base'
+            else:
+                deltatype = 'prev'
+
+        while iterrev != e[3]:
+            chain.append(iterrev)
+            chainsize += e[1]
+            if generaldelta:
+                iterrev = e[3]
+            else:
+                iterrev -= 1
+            e = index[iterrev]
+        else:
+            chainsize += e[1]
+            chain.append(iterrev)
+
+        chain.reverse()
+        return compsize, uncompsize, deltatype, chain, chainsize
+
+    fm = ui.formatter('debugdeltachain', opts)
+
+    fm.plain('    rev  chain# chainlen     prev   delta       '
+             'size    rawsize  chainsize     ratio   lindist extradist '
+             'extraratio\n')
+
+    chainbases = {}
+    for rev in r:
+        comp, uncomp, deltatype, chain, chainsize = revinfo(rev)
+        chainbase = chain[0]
+        chainid = chainbases.setdefault(chainbase, len(chainbases) + 1)
+        basestart = r.start(chainbase)
+        revstart = r.start(rev)
+        lineardist = revstart + comp - basestart
+        extradist = lineardist - chainsize
+        try:
+            prevrev = chain[-2]
+        except IndexError:
+            prevrev = -1
+
+        chainratio = float(chainsize) / float(uncomp)
+        extraratio = float(extradist) / float(chainsize)
+
+        fm.startitem()
+        fm.write('rev chainid chainlen prevrev deltatype compsize '
+                 'uncompsize chainsize chainratio lindist extradist '
+                 'extraratio',
+                 '%7d %7d %8d %8d %7s %10d %10d %10d %9.5f %9d %9d %10.5f\n',
+                 rev, chainid, len(chain), prevrev, deltatype, comp,
+                 uncomp, chainsize, chainratio, lineardist, extradist,
+                 extraratio,
+                 rev=rev, chainid=chainid, chainlen=len(chain),
+                 prevrev=prevrev, deltatype=deltatype, compsize=comp,
+                 uncompsize=uncomp, chainsize=chainsize,
+                 chainratio=chainratio, lindist=lineardist,
+                 extradist=extradist, extraratio=extraratio)
+
+    fm.end()
+
 @command('debuginstall', [], '', norepo=True)
 def debuginstall(ui):
     '''test Mercurial installation
--- a/tests/test-completion.t	Sat Oct 24 19:56:39 2015 +0100
+++ b/tests/test-completion.t	Sat Dec 05 23:37:46 2015 -0800
@@ -80,6 +80,7 @@
   debugdag
   debugdata
   debugdate
+  debugdeltachain
   debugdirstate
   debugdiscovery
   debugextensions
@@ -243,6 +244,7 @@
   debugdag: tags, branches, dots, spaces
   debugdata: changelog, manifest, dir
   debugdate: extended
+  debugdeltachain: changelog, manifest, dir, template
   debugdirstate: nodates, datesort
   debugdiscovery: old, nonheads, ssh, remotecmd, insecure
   debugextensions: template
--- a/tests/test-debugcommands.t	Sat Oct 24 19:56:39 2015 +0100
+++ b/tests/test-debugcommands.t	Sat Dec 05 23:37:46 2015 -0800
@@ -44,6 +44,32 @@
      rev flag   offset   length     size  .....   link     p1     p2                                   nodeid (re)
        0 0000        0        3        2   ....      0     -1     -1 b789fdd96dc2f3bd229c1dd8eedf0fc60e2b68e3 (re)
 
+debugdelta chain basic output
+
+  $ hg debugdeltachain -m
+      rev  chain# chainlen     prev   delta       size    rawsize  chainsize     ratio   lindist extradist extraratio
+        0       1        1       -1    base         44         43         44   1.02326        44         0    0.00000
+
+  $ hg debugdeltachain -m -T '{rev} {chainid} {chainlen}\n'
+  0 1 1
+
+  $ hg debugdeltachain -m -Tjson
+  [
+   {
+    "chainid": 1,
+    "chainlen": 1,
+    "chainratio": 1.02325581395,
+    "chainsize": 44,
+    "compsize": 44,
+    "deltatype": "base",
+    "extradist": 0,
+    "extraratio": 0.0,
+    "lindist": 44,
+    "prevrev": -1,
+    "rev": 0,
+    "uncompsize": 43
+   }
+  ]
 
 Test max chain len
   $ cat >> $HGRCPATH << EOF
--- a/tests/test-help.t	Sat Oct 24 19:56:39 2015 +0100
+++ b/tests/test-help.t	Sat Dec 05 23:37:46 2015 -0800
@@ -812,6 +812,8 @@
                  description
    debugdata     dump the contents of a data file revision
    debugdate     parse and display a date
+   debugdeltachain
+                 dump information about delta chains in a revlog
    debugdirstate
                  show the contents of the current dirstate
    debugdiscovery