hgext/censor.py
author Pierre-Yves David <pierre-yves.david@octobus.net>
Tue, 09 Apr 2024 22:37:15 +0200
changeset 51595 3a6fae3bef35
parent 51270 ceeb8fa23cc8
permissions -rw-r--r--
outgoing: add a simple fastpath when there is no common This further speed up case like `hg bundle --all` for larger repository. ### data-env-vars.name = mozilla-try-2023-03-22-zstd-sparse-revlog # benchmark.name = hg.command.bundle # bin-env-vars.hg.flavor = default # bin-env-vars.hg.py-re2-module = default # benchmark.variants.revs = all # benchmark.variants.type = none-streamv2 before: 316.749699 after: 311.165461 (-1.76%, -5.58) There is further work to be done in this area like not doing any outgoing computation in the stream case for example. however the recent changes already gives use a large win for a small amount of local work. ### benchmark.name = hg.command.bundle # bin-env-vars.hg.flavor = default # bin-env-vars.hg.py-re2-module = default # benchmark.variants.revs = all # benchmark.variants.type = none-streamv2 ## data-env-vars.name = mercurial-public-2024-03-22-zstd-sparse-revlog pre-%ln-change: 1.263859 the-%ln-change: 0.700229 (-44.60%, -0.56) prev-changeset: 0.496050 (-60.75%, -0.77) this-changeset: 0.495243 (-60.81%, -0.77) ## data-env-vars.name = tryton-public-2024-03-22-zstd-sparse-revlog pre-%ln-change: 2.975765 the-%ln-change: 1.870798 (-37.13%, -1.10) prev-changeset: 1.461583 (-50.88%, -1.51) this-changeset: 1.469185 (-50.63%, -1.51) ## data-env-vars.name = pypy-2024-03-22-zstd-sparse-revlog pre-%ln-change: 4.540080 the-%ln-change: 3.401700 (-25.07%, -1.14) prev-changeset: 2.915810 (-35.78%, -1.62) this-changeset: 2.911643 (-35.87%, -1.63) ## data-env-vars.name = heptapod-public-2024-03-25-zstd-sparse-revlog pre-%ln-change: 10.138396 the-%ln-change: 7.750458 (-23.55%, -2.39) prev-changeset: 6.665565 (-34.25%, -3.47) this-changeset: 6.672078 (-34.19%, -3.47) ## data-env-vars.name = mozilla-try-2023-03-22-zstd-sparse-revlog pre-%ln-change: 399.484481 the-%ln-change: 346.508952 (-13.26%, -52.98) prev-changeset: 316.749699 (-20.71%, -82.73) this-changeset: 311.165461 (-22.11%, -88.32)

# Copyright (C) 2015 - Mike Edgar <adgar@google.com>
#
# This extension enables removal of file content at a given revision,
# rewriting the data/metadata of successive revisions to preserve revision log
# integrity.

"""erase file content at a given revision

The censor command instructs Mercurial to erase all content of a file at a given
revision *without updating the changeset hash.* This allows existing history to
remain valid while preventing future clones/pulls from receiving the erased
data.

Typical uses for censor are due to security or legal requirements, including::

 * Passwords, private keys, cryptographic material
 * Licensed data/code/libraries for which the license has expired
 * Personally Identifiable Information or other private data

Censored nodes can interrupt mercurial's typical operation whenever the excised
data needs to be materialized. Some commands, like ``hg cat``/``hg revert``,
simply fail when asked to produce censored data. Others, like ``hg verify`` and
``hg update``, must be capable of tolerating censored data to continue to
function in a meaningful way. Such commands only tolerate censored file
As having a censored version in a checkout is impractical. The current head
revisions of the repository are checked. If the revision to be censored is in
any of them the command will abort.

A few informative commands such as ``hg grep`` will unconditionally
ignore censored data and merely report that it was encountered.
"""


from mercurial.i18n import _
from mercurial.node import short

from mercurial import (
    error,
    registrar,
    scmutil,
)

cmdtable = {}
command = registrar.command(cmdtable)
# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
# be specifying the version(s) of Mercurial they are tested with, or
# leave the attribute unspecified.
testedwith = b'ships-with-hg-core'


@command(
    b'censor',
    [
        (
            b'r',
            b'rev',
            [],
            _(b'censor file from specified revision'),
            _(b'REV'),
        ),
        (
            b'',
            b'check-heads',
            True,
            _(b'check that repository heads are not affected'),
        ),
        (b't', b'tombstone', b'', _(b'replacement tombstone data'), _(b'TEXT')),
    ],
    _(b'-r REV [-t TEXT] [FILE]'),
    helpcategory=command.CATEGORY_MAINTENANCE,
)
def censor(ui, repo, path, rev=(), tombstone=b'', check_heads=True, **opts):
    with repo.wlock(), repo.lock():
        return _docensor(
            ui,
            repo,
            path,
            rev,
            tombstone,
            check_heads=check_heads,
            **opts,
        )


def _docensor(ui, repo, path, revs=(), tombstone=b'', check_heads=True, **opts):
    if not path:
        raise error.Abort(_(b'must specify file path to censor'))
    if not revs:
        raise error.Abort(_(b'must specify revisions to censor'))

    wctx = repo[None]

    m = scmutil.match(wctx, (path,))
    if m.anypats() or len(m.files()) != 1:
        raise error.Abort(_(b'can only specify an explicit filename'))
    path = m.files()[0]
    flog = repo.file(path)
    if not len(flog):
        raise error.Abort(_(b'cannot censor file with no history'))

    revs = scmutil.revrange(repo, revs)
    if not revs:
        raise error.Abort(_(b'no matching revisions'))
    file_nodes = set()
    for r in revs:
        try:
            ctx = repo[r]
            file_nodes.add(ctx.filectx(path).filenode())
        except error.LookupError:
            raise error.Abort(_(b'file does not exist at revision %s') % ctx)

    if check_heads:
        heads = []
        repo_heads = repo.heads()
        msg = b'checking for the censored content in %d heads\n'
        msg %= len(repo_heads)
        ui.status(msg)
        for headnode in repo_heads:
            hc = repo[headnode]
            if path in hc and hc.filenode(path) in file_nodes:
                heads.append(hc)
        if heads:
            headlist = b', '.join([short(c.node()) for c in heads])
            raise error.Abort(
                _(b'cannot censor file in heads (%s)') % headlist,
                hint=_(b'clean/delete and commit first'),
            )

    msg = b'checking for the censored content in the working directory\n'
    ui.status(msg)
    wp = wctx.parents()
    if ctx.node() in [p.node() for p in wp]:
        raise error.Abort(
            _(b'cannot censor working directory'),
            hint=_(b'clean/delete/update first'),
        )

    msg = b'censoring %d file revisions\n'
    msg %= len(file_nodes)
    ui.status(msg)
    with repo.transaction(b'censor') as tr:
        flog.censorrevision(tr, file_nodes, tombstone=tombstone)