contrib/undumprevlog
author Georges Racinet <georges.racinet@octobus.net>
Sat, 24 Apr 2021 16:30:05 +0200
branchstable
changeset 47011 b7e623ac98b6
parent 46113 59fa3890d40a
child 47072 4c041c71ec01
permissions -rwxr-xr-x
repoview: separate concerns in _filteredrepotypes comment The cited issue in Python bugtracker is closed, but hasn't been fixed. We've been able to use the attached example and reproduce it with Python 3.9. The point where it turns from needless stress on the GC to the an actual leak is when one factors in the fact that the GC was before Python 3.4 unable to collect some types (see PEP 442). Note that even with Python 2.7, the simple example of cycles due to __mro__ are collectable. This was seen again with the example attached on the CPython issue.

#!/usr/bin/env python3
# Undump a dump from dumprevlog
# $ hg init
# $ undumprevlog < repo.dump

from __future__ import absolute_import, print_function

import sys
from mercurial.node import bin
from mercurial import (
    encoding,
    revlog,
    transaction,
    vfs as vfsmod,
)
from mercurial.utils import procutil

for fp in (sys.stdin, sys.stdout, sys.stderr):
    procutil.setbinary(fp)

opener = vfsmod.vfs(b'.', False)
tr = transaction.transaction(
    sys.stderr.write, opener, {b'store': opener}, b"undump.journal"
)
while True:
    l = sys.stdin.readline()
    if not l:
        break
    if l.startswith("file:"):
        f = encoding.strtolocal(l[6:-1])
        r = revlog.revlog(opener, f)
        procutil.stdout.write(b'%s\n' % f)
    elif l.startswith("node:"):
        n = bin(l[6:-1])
    elif l.startswith("linkrev:"):
        lr = int(l[9:-1])
    elif l.startswith("parents:"):
        p = l[9:-1].split()
        p1 = bin(p[0])
        p2 = bin(p[1])
    elif l.startswith("length:"):
        length = int(l[8:-1])
        sys.stdin.readline()  # start marker
        d = encoding.strtolocal(sys.stdin.read(length))
        sys.stdin.readline()  # end marker
        r.addrevision(d, tr, lr, p1, p2)

tr.close()