contrib/dumprevlog
author Pierre-Yves David <pierre-yves.david@octobus.net>
Fri, 15 Mar 2024 01:31:57 +0100
branchstable
changeset 51505 c9ceb4f60256
parent 48875 6000f5b25c9b
permissions -rwxr-xr-x
phases: avoid N² behavior in `advanceboundary` We allowed duplicated entries in the deque, which each entry could potentially insert all its ancestors. So advancing boundary for the full repository would mean each revision would walk all its ancestors, resulting in O(N²) iteration. For repository of any decent size, N² is quickly insane. We introduce a simple set to avoid this and get back to reasonable performance.

#!/usr/bin/env python3
# Dump revlogs as raw data stream
# $ find .hg/store/ -name "*.i" | xargs dumprevlog > repo.dump


import sys
from mercurial.node import hex
from mercurial import (
    encoding,
    pycompat,
    revlog,
)
from mercurial.utils import procutil

from mercurial.revlogutils import (
    constants as revlog_constants,
)

for fp in (sys.stdin, sys.stdout, sys.stderr):
    procutil.setbinary(fp)


def binopen(path, mode=b'rb'):
    if b'b' not in mode:
        mode = mode + b'b'
    return open(path, pycompat.sysstr(mode))


binopen.options = {}


def printb(data, end=b'\n'):
    sys.stdout.flush()
    procutil.stdout.write(data + end)


for f in sys.argv[1:]:
    localf = encoding.strtolocal(f)
    if not localf.endswith(b'.i'):
        print("file:", f, file=sys.stderr)
        print("  invalid filename", file=sys.stderr)

    r = revlog.revlog(
        binopen,
        target=(revlog_constants.KIND_OTHER, b'dump-revlog'),
        radix=localf[:-2],
    )
    print("file:", f)
    for i in r:
        n = r.node(i)
        p = r.parents(n)
        d = r.revision(n)
        printb(b"node: %s" % hex(n))
        printb(b"linkrev: %d" % r.linkrev(i))
        printb(b"parents: %s %s" % (hex(p[0]), hex(p[1])))
        printb(b"length: %d" % len(d))
        printb(b"-start-")
        printb(d)
        printb(b"-end-")