debug-revlog: move the code in revlogutils module
authorPierre-Yves David <pierre-yves.david@octobus.net>
Mon, 07 Nov 2022 14:24:52 -0500
changeset 49660 bd3b6f363fb9
parent 49659 7c0a383849a8
child 49661 511106bcb16c
debug-revlog: move the code in revlogutils module We have a module dedicated to debug code, let us use it.
mercurial/debugcommands.py
mercurial/revlogutils/debug.py
--- a/mercurial/debugcommands.py	Mon Nov 07 14:13:59 2022 -0500
+++ b/mercurial/debugcommands.py	Mon Nov 07 14:24:52 2022 -0500
@@ -21,7 +21,6 @@
 import socket
 import ssl
 import stat
-import string
 import subprocess
 import sys
 import time
@@ -3241,303 +3240,9 @@
 
     if opts.get(b"dump"):
         revlog_debug.dump(ui, r)
-        return 0
-
-    format = r._format_version
-    v = r._format_flags
-    flags = []
-    gdelta = False
-    if v & revlog.FLAG_INLINE_DATA:
-        flags.append(b'inline')
-    if v & revlog.FLAG_GENERALDELTA:
-        gdelta = True
-        flags.append(b'generaldelta')
-    if not flags:
-        flags = [b'(none)']
-
-    ### tracks merge vs single parent
-    nummerges = 0
-
-    ### tracks ways the "delta" are build
-    # nodelta
-    numempty = 0
-    numemptytext = 0
-    numemptydelta = 0
-    # full file content
-    numfull = 0
-    # intermediate snapshot against a prior snapshot
-    numsemi = 0
-    # snapshot count per depth
-    numsnapdepth = collections.defaultdict(lambda: 0)
-    # delta against previous revision
-    numprev = 0
-    # delta against first or second parent (not prev)
-    nump1 = 0
-    nump2 = 0
-    # delta against neither prev nor parents
-    numother = 0
-    # delta against prev that are also first or second parent
-    # (details of `numprev`)
-    nump1prev = 0
-    nump2prev = 0
-
-    # data about delta chain of each revs
-    chainlengths = []
-    chainbases = []
-    chainspans = []
-
-    # data about each revision
-    datasize = [None, 0, 0]
-    fullsize = [None, 0, 0]
-    semisize = [None, 0, 0]
-    # snapshot count per depth
-    snapsizedepth = collections.defaultdict(lambda: [None, 0, 0])
-    deltasize = [None, 0, 0]
-    chunktypecounts = {}
-    chunktypesizes = {}
-
-    def addsize(size, l):
-        if l[0] is None or size < l[0]:
-            l[0] = size
-        if size > l[1]:
-            l[1] = size
-        l[2] += size
-
-    numrevs = len(r)
-    for rev in range(numrevs):
-        p1, p2 = r.parentrevs(rev)
-        delta = r.deltaparent(rev)
-        if format > 0:
-            addsize(r.rawsize(rev), datasize)
-        if p2 != nullrev:
-            nummerges += 1
-        size = r.length(rev)
-        if delta == nullrev:
-            chainlengths.append(0)
-            chainbases.append(r.start(rev))
-            chainspans.append(size)
-            if size == 0:
-                numempty += 1
-                numemptytext += 1
-            else:
-                numfull += 1
-                numsnapdepth[0] += 1
-                addsize(size, fullsize)
-                addsize(size, snapsizedepth[0])
-        else:
-            chainlengths.append(chainlengths[delta] + 1)
-            baseaddr = chainbases[delta]
-            revaddr = r.start(rev)
-            chainbases.append(baseaddr)
-            chainspans.append((revaddr - baseaddr) + size)
-            if size == 0:
-                numempty += 1
-                numemptydelta += 1
-            elif r.issnapshot(rev):
-                addsize(size, semisize)
-                numsemi += 1
-                depth = r.snapshotdepth(rev)
-                numsnapdepth[depth] += 1
-                addsize(size, snapsizedepth[depth])
-            else:
-                addsize(size, deltasize)
-                if delta == rev - 1:
-                    numprev += 1
-                    if delta == p1:
-                        nump1prev += 1
-                    elif delta == p2:
-                        nump2prev += 1
-                elif delta == p1:
-                    nump1 += 1
-                elif delta == p2:
-                    nump2 += 1
-                elif delta != nullrev:
-                    numother += 1
-
-        # Obtain data on the raw chunks in the revlog.
-        if util.safehasattr(r, b'_getsegmentforrevs'):
-            segment = r._getsegmentforrevs(rev, rev)[1]
-        else:
-            segment = r._revlog._getsegmentforrevs(rev, rev)[1]
-        if segment:
-            chunktype = bytes(segment[0:1])
-        else:
-            chunktype = b'empty'
-
-        if chunktype not in chunktypecounts:
-            chunktypecounts[chunktype] = 0
-            chunktypesizes[chunktype] = 0
-
-        chunktypecounts[chunktype] += 1
-        chunktypesizes[chunktype] += size
-
-    # Adjust size min value for empty cases
-    for size in (datasize, fullsize, semisize, deltasize):
-        if size[0] is None:
-            size[0] = 0
-
-    numdeltas = numrevs - numfull - numempty - numsemi
-    numoprev = numprev - nump1prev - nump2prev
-    totalrawsize = datasize[2]
-    datasize[2] /= numrevs
-    fulltotal = fullsize[2]
-    if numfull == 0:
-        fullsize[2] = 0
     else:
-        fullsize[2] /= numfull
-    semitotal = semisize[2]
-    snaptotal = {}
-    if numsemi > 0:
-        semisize[2] /= numsemi
-    for depth in snapsizedepth:
-        snaptotal[depth] = snapsizedepth[depth][2]
-        snapsizedepth[depth][2] /= numsnapdepth[depth]
-
-    deltatotal = deltasize[2]
-    if numdeltas > 0:
-        deltasize[2] /= numdeltas
-    totalsize = fulltotal + semitotal + deltatotal
-    avgchainlen = sum(chainlengths) / numrevs
-    maxchainlen = max(chainlengths)
-    maxchainspan = max(chainspans)
-    compratio = 1
-    if totalsize:
-        compratio = totalrawsize / totalsize
-
-    basedfmtstr = b'%%%dd\n'
-    basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n'
-
-    def dfmtstr(max):
-        return basedfmtstr % len(str(max))
-
-    def pcfmtstr(max, padding=0):
-        return basepcfmtstr % (len(str(max)), b' ' * padding)
-
-    def pcfmt(value, total):
-        if total:
-            return (value, 100 * float(value) / total)
-        else:
-            return value, 100.0
-
-    ui.writenoi18n(b'format : %d\n' % format)
-    ui.writenoi18n(b'flags  : %s\n' % b', '.join(flags))
-
-    ui.write(b'\n')
-    fmt = pcfmtstr(totalsize)
-    fmt2 = dfmtstr(totalsize)
-    ui.writenoi18n(b'revisions     : ' + fmt2 % numrevs)
-    ui.writenoi18n(b'    merges    : ' + fmt % pcfmt(nummerges, numrevs))
-    ui.writenoi18n(
-        b'    normal    : ' + fmt % pcfmt(numrevs - nummerges, numrevs)
-    )
-    ui.writenoi18n(b'revisions     : ' + fmt2 % numrevs)
-    ui.writenoi18n(b'    empty     : ' + fmt % pcfmt(numempty, numrevs))
-    ui.writenoi18n(
-        b'                   text  : '
-        + fmt % pcfmt(numemptytext, numemptytext + numemptydelta)
-    )
-    ui.writenoi18n(
-        b'                   delta : '
-        + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta)
-    )
-    ui.writenoi18n(
-        b'    snapshot  : ' + fmt % pcfmt(numfull + numsemi, numrevs)
-    )
-    for depth in sorted(numsnapdepth):
-        ui.write(
-            (b'      lvl-%-3d :       ' % depth)
-            + fmt % pcfmt(numsnapdepth[depth], numrevs)
-        )
-    ui.writenoi18n(b'    deltas    : ' + fmt % pcfmt(numdeltas, numrevs))
-    ui.writenoi18n(b'revision size : ' + fmt2 % totalsize)
-    ui.writenoi18n(
-        b'    snapshot  : ' + fmt % pcfmt(fulltotal + semitotal, totalsize)
-    )
-    for depth in sorted(numsnapdepth):
-        ui.write(
-            (b'      lvl-%-3d :       ' % depth)
-            + fmt % pcfmt(snaptotal[depth], totalsize)
-        )
-    ui.writenoi18n(b'    deltas    : ' + fmt % pcfmt(deltatotal, totalsize))
-
-    def fmtchunktype(chunktype):
-        if chunktype == b'empty':
-            return b'    %s     : ' % chunktype
-        elif chunktype in pycompat.bytestr(string.ascii_letters):
-            return b'    0x%s (%s)  : ' % (hex(chunktype), chunktype)
-        else:
-            return b'    0x%s      : ' % hex(chunktype)
-
-    ui.write(b'\n')
-    ui.writenoi18n(b'chunks        : ' + fmt2 % numrevs)
-    for chunktype in sorted(chunktypecounts):
-        ui.write(fmtchunktype(chunktype))
-        ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
-    ui.writenoi18n(b'chunks size   : ' + fmt2 % totalsize)
-    for chunktype in sorted(chunktypecounts):
-        ui.write(fmtchunktype(chunktype))
-        ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
-
-    ui.write(b'\n')
-    fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio))
-    ui.writenoi18n(b'avg chain length  : ' + fmt % avgchainlen)
-    ui.writenoi18n(b'max chain length  : ' + fmt % maxchainlen)
-    ui.writenoi18n(b'max chain reach   : ' + fmt % maxchainspan)
-    ui.writenoi18n(b'compression ratio : ' + fmt % compratio)
-
-    if format > 0:
-        ui.write(b'\n')
-        ui.writenoi18n(
-            b'uncompressed data size (min/max/avg) : %d / %d / %d\n'
-            % tuple(datasize)
-        )
-    ui.writenoi18n(
-        b'full revision size (min/max/avg)     : %d / %d / %d\n'
-        % tuple(fullsize)
-    )
-    ui.writenoi18n(
-        b'inter-snapshot size (min/max/avg)    : %d / %d / %d\n'
-        % tuple(semisize)
-    )
-    for depth in sorted(snapsizedepth):
-        if depth == 0:
-            continue
-        ui.writenoi18n(
-            b'    level-%-3d (min/max/avg)          : %d / %d / %d\n'
-            % ((depth,) + tuple(snapsizedepth[depth]))
-        )
-    ui.writenoi18n(
-        b'delta size (min/max/avg)             : %d / %d / %d\n'
-        % tuple(deltasize)
-    )
-
-    if numdeltas > 0:
-        ui.write(b'\n')
-        fmt = pcfmtstr(numdeltas)
-        fmt2 = pcfmtstr(numdeltas, 4)
-        ui.writenoi18n(
-            b'deltas against prev  : ' + fmt % pcfmt(numprev, numdeltas)
-        )
-        if numprev > 0:
-            ui.writenoi18n(
-                b'    where prev = p1  : ' + fmt2 % pcfmt(nump1prev, numprev)
-            )
-            ui.writenoi18n(
-                b'    where prev = p2  : ' + fmt2 % pcfmt(nump2prev, numprev)
-            )
-            ui.writenoi18n(
-                b'    other            : ' + fmt2 % pcfmt(numoprev, numprev)
-            )
-        if gdelta:
-            ui.writenoi18n(
-                b'deltas against p1    : ' + fmt % pcfmt(nump1, numdeltas)
-            )
-            ui.writenoi18n(
-                b'deltas against p2    : ' + fmt % pcfmt(nump2, numdeltas)
-            )
-            ui.writenoi18n(
-                b'deltas against other : ' + fmt % pcfmt(numother, numdeltas)
-            )
+        revlog_debug.debug_revlog(ui, r)
+    return 0
 
 
 @command(
--- a/mercurial/revlogutils/debug.py	Mon Nov 07 14:13:59 2022 -0500
+++ b/mercurial/revlogutils/debug.py	Mon Nov 07 14:24:52 2022 -0500
@@ -6,8 +6,12 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
+import collections
+import string
+
 from .. import (
     node as nodemod,
+    util,
 )
 
 from . import (
@@ -267,3 +271,305 @@
                 clen,
             )
         )
+
+
+def debug_revlog(ui, revlog):
+    """code for `hg debugrevlog`"""
+    r = revlog
+    format = r._format_version
+    v = r._format_flags
+    flags = []
+    gdelta = False
+    if v & constants.FLAG_INLINE_DATA:
+        flags.append(b'inline')
+    if v & constants.FLAG_GENERALDELTA:
+        gdelta = True
+        flags.append(b'generaldelta')
+    if not flags:
+        flags = [b'(none)']
+
+    ### tracks merge vs single parent
+    nummerges = 0
+
+    ### tracks ways the "delta" are build
+    # nodelta
+    numempty = 0
+    numemptytext = 0
+    numemptydelta = 0
+    # full file content
+    numfull = 0
+    # intermediate snapshot against a prior snapshot
+    numsemi = 0
+    # snapshot count per depth
+    numsnapdepth = collections.defaultdict(lambda: 0)
+    # delta against previous revision
+    numprev = 0
+    # delta against first or second parent (not prev)
+    nump1 = 0
+    nump2 = 0
+    # delta against neither prev nor parents
+    numother = 0
+    # delta against prev that are also first or second parent
+    # (details of `numprev`)
+    nump1prev = 0
+    nump2prev = 0
+
+    # data about delta chain of each revs
+    chainlengths = []
+    chainbases = []
+    chainspans = []
+
+    # data about each revision
+    datasize = [None, 0, 0]
+    fullsize = [None, 0, 0]
+    semisize = [None, 0, 0]
+    # snapshot count per depth
+    snapsizedepth = collections.defaultdict(lambda: [None, 0, 0])
+    deltasize = [None, 0, 0]
+    chunktypecounts = {}
+    chunktypesizes = {}
+
+    def addsize(size, l):
+        if l[0] is None or size < l[0]:
+            l[0] = size
+        if size > l[1]:
+            l[1] = size
+        l[2] += size
+
+    numrevs = len(r)
+    for rev in range(numrevs):
+        p1, p2 = r.parentrevs(rev)
+        delta = r.deltaparent(rev)
+        if format > 0:
+            addsize(r.rawsize(rev), datasize)
+        if p2 != nodemod.nullrev:
+            nummerges += 1
+        size = r.length(rev)
+        if delta == nodemod.nullrev:
+            chainlengths.append(0)
+            chainbases.append(r.start(rev))
+            chainspans.append(size)
+            if size == 0:
+                numempty += 1
+                numemptytext += 1
+            else:
+                numfull += 1
+                numsnapdepth[0] += 1
+                addsize(size, fullsize)
+                addsize(size, snapsizedepth[0])
+        else:
+            chainlengths.append(chainlengths[delta] + 1)
+            baseaddr = chainbases[delta]
+            revaddr = r.start(rev)
+            chainbases.append(baseaddr)
+            chainspans.append((revaddr - baseaddr) + size)
+            if size == 0:
+                numempty += 1
+                numemptydelta += 1
+            elif r.issnapshot(rev):
+                addsize(size, semisize)
+                numsemi += 1
+                depth = r.snapshotdepth(rev)
+                numsnapdepth[depth] += 1
+                addsize(size, snapsizedepth[depth])
+            else:
+                addsize(size, deltasize)
+                if delta == rev - 1:
+                    numprev += 1
+                    if delta == p1:
+                        nump1prev += 1
+                    elif delta == p2:
+                        nump2prev += 1
+                elif delta == p1:
+                    nump1 += 1
+                elif delta == p2:
+                    nump2 += 1
+                elif delta != nodemod.nullrev:
+                    numother += 1
+
+        # Obtain data on the raw chunks in the revlog.
+        if util.safehasattr(r, '_getsegmentforrevs'):
+            segment = r._getsegmentforrevs(rev, rev)[1]
+        else:
+            segment = r._revlog._getsegmentforrevs(rev, rev)[1]
+        if segment:
+            chunktype = bytes(segment[0:1])
+        else:
+            chunktype = b'empty'
+
+        if chunktype not in chunktypecounts:
+            chunktypecounts[chunktype] = 0
+            chunktypesizes[chunktype] = 0
+
+        chunktypecounts[chunktype] += 1
+        chunktypesizes[chunktype] += size
+
+    # Adjust size min value for empty cases
+    for size in (datasize, fullsize, semisize, deltasize):
+        if size[0] is None:
+            size[0] = 0
+
+    numdeltas = numrevs - numfull - numempty - numsemi
+    numoprev = numprev - nump1prev - nump2prev
+    totalrawsize = datasize[2]
+    datasize[2] /= numrevs
+    fulltotal = fullsize[2]
+    if numfull == 0:
+        fullsize[2] = 0
+    else:
+        fullsize[2] /= numfull
+    semitotal = semisize[2]
+    snaptotal = {}
+    if numsemi > 0:
+        semisize[2] /= numsemi
+    for depth in snapsizedepth:
+        snaptotal[depth] = snapsizedepth[depth][2]
+        snapsizedepth[depth][2] /= numsnapdepth[depth]
+
+    deltatotal = deltasize[2]
+    if numdeltas > 0:
+        deltasize[2] /= numdeltas
+    totalsize = fulltotal + semitotal + deltatotal
+    avgchainlen = sum(chainlengths) / numrevs
+    maxchainlen = max(chainlengths)
+    maxchainspan = max(chainspans)
+    compratio = 1
+    if totalsize:
+        compratio = totalrawsize / totalsize
+
+    basedfmtstr = b'%%%dd\n'
+    basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n'
+
+    def dfmtstr(max):
+        return basedfmtstr % len(str(max))
+
+    def pcfmtstr(max, padding=0):
+        return basepcfmtstr % (len(str(max)), b' ' * padding)
+
+    def pcfmt(value, total):
+        if total:
+            return (value, 100 * float(value) / total)
+        else:
+            return value, 100.0
+
+    ui.writenoi18n(b'format : %d\n' % format)
+    ui.writenoi18n(b'flags  : %s\n' % b', '.join(flags))
+
+    ui.write(b'\n')
+    fmt = pcfmtstr(totalsize)
+    fmt2 = dfmtstr(totalsize)
+    ui.writenoi18n(b'revisions     : ' + fmt2 % numrevs)
+    ui.writenoi18n(b'    merges    : ' + fmt % pcfmt(nummerges, numrevs))
+    ui.writenoi18n(
+        b'    normal    : ' + fmt % pcfmt(numrevs - nummerges, numrevs)
+    )
+    ui.writenoi18n(b'revisions     : ' + fmt2 % numrevs)
+    ui.writenoi18n(b'    empty     : ' + fmt % pcfmt(numempty, numrevs))
+    ui.writenoi18n(
+        b'                   text  : '
+        + fmt % pcfmt(numemptytext, numemptytext + numemptydelta)
+    )
+    ui.writenoi18n(
+        b'                   delta : '
+        + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta)
+    )
+    ui.writenoi18n(
+        b'    snapshot  : ' + fmt % pcfmt(numfull + numsemi, numrevs)
+    )
+    for depth in sorted(numsnapdepth):
+        ui.write(
+            (b'      lvl-%-3d :       ' % depth)
+            + fmt % pcfmt(numsnapdepth[depth], numrevs)
+        )
+    ui.writenoi18n(b'    deltas    : ' + fmt % pcfmt(numdeltas, numrevs))
+    ui.writenoi18n(b'revision size : ' + fmt2 % totalsize)
+    ui.writenoi18n(
+        b'    snapshot  : ' + fmt % pcfmt(fulltotal + semitotal, totalsize)
+    )
+    for depth in sorted(numsnapdepth):
+        ui.write(
+            (b'      lvl-%-3d :       ' % depth)
+            + fmt % pcfmt(snaptotal[depth], totalsize)
+        )
+    ui.writenoi18n(b'    deltas    : ' + fmt % pcfmt(deltatotal, totalsize))
+
+    letters = string.ascii_letters.encode('ascii')
+
+    def fmtchunktype(chunktype):
+        if chunktype == b'empty':
+            return b'    %s     : ' % chunktype
+        elif chunktype in letters:
+            return b'    0x%s (%s)  : ' % (nodemod.hex(chunktype), chunktype)
+        else:
+            return b'    0x%s      : ' % nodemod.hex(chunktype)
+
+    ui.write(b'\n')
+    ui.writenoi18n(b'chunks        : ' + fmt2 % numrevs)
+    for chunktype in sorted(chunktypecounts):
+        ui.write(fmtchunktype(chunktype))
+        ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
+    ui.writenoi18n(b'chunks size   : ' + fmt2 % totalsize)
+    for chunktype in sorted(chunktypecounts):
+        ui.write(fmtchunktype(chunktype))
+        ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
+
+    ui.write(b'\n')
+    fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio))
+    ui.writenoi18n(b'avg chain length  : ' + fmt % avgchainlen)
+    ui.writenoi18n(b'max chain length  : ' + fmt % maxchainlen)
+    ui.writenoi18n(b'max chain reach   : ' + fmt % maxchainspan)
+    ui.writenoi18n(b'compression ratio : ' + fmt % compratio)
+
+    if format > 0:
+        ui.write(b'\n')
+        ui.writenoi18n(
+            b'uncompressed data size (min/max/avg) : %d / %d / %d\n'
+            % tuple(datasize)
+        )
+    ui.writenoi18n(
+        b'full revision size (min/max/avg)     : %d / %d / %d\n'
+        % tuple(fullsize)
+    )
+    ui.writenoi18n(
+        b'inter-snapshot size (min/max/avg)    : %d / %d / %d\n'
+        % tuple(semisize)
+    )
+    for depth in sorted(snapsizedepth):
+        if depth == 0:
+            continue
+        ui.writenoi18n(
+            b'    level-%-3d (min/max/avg)          : %d / %d / %d\n'
+            % ((depth,) + tuple(snapsizedepth[depth]))
+        )
+    ui.writenoi18n(
+        b'delta size (min/max/avg)             : %d / %d / %d\n'
+        % tuple(deltasize)
+    )
+
+    if numdeltas > 0:
+        ui.write(b'\n')
+        fmt = pcfmtstr(numdeltas)
+        fmt2 = pcfmtstr(numdeltas, 4)
+        ui.writenoi18n(
+            b'deltas against prev  : ' + fmt % pcfmt(numprev, numdeltas)
+        )
+        if numprev > 0:
+            ui.writenoi18n(
+                b'    where prev = p1  : ' + fmt2 % pcfmt(nump1prev, numprev)
+            )
+            ui.writenoi18n(
+                b'    where prev = p2  : ' + fmt2 % pcfmt(nump2prev, numprev)
+            )
+            ui.writenoi18n(
+                b'    other            : ' + fmt2 % pcfmt(numoprev, numprev)
+            )
+        if gdelta:
+            ui.writenoi18n(
+                b'deltas against p1    : ' + fmt % pcfmt(nump1, numdeltas)
+            )
+            ui.writenoi18n(
+                b'deltas against p2    : ' + fmt % pcfmt(nump2, numdeltas)
+            )
+            ui.writenoi18n(
+                b'deltas against other : ' + fmt % pcfmt(numother, numdeltas)
+            )