hgext/automv.py
author Pierre-Yves David <pierre-yves.david@octobus.net>
Tue, 09 Apr 2024 22:37:15 +0200
changeset 51595 3a6fae3bef35
parent 50870 47af00b2217f
permissions -rw-r--r--
outgoing: add a simple fastpath when there is no common This further speed up case like `hg bundle --all` for larger repository. ### data-env-vars.name = mozilla-try-2023-03-22-zstd-sparse-revlog # benchmark.name = hg.command.bundle # bin-env-vars.hg.flavor = default # bin-env-vars.hg.py-re2-module = default # benchmark.variants.revs = all # benchmark.variants.type = none-streamv2 before: 316.749699 after: 311.165461 (-1.76%, -5.58) There is further work to be done in this area like not doing any outgoing computation in the stream case for example. however the recent changes already gives use a large win for a small amount of local work. ### benchmark.name = hg.command.bundle # bin-env-vars.hg.flavor = default # bin-env-vars.hg.py-re2-module = default # benchmark.variants.revs = all # benchmark.variants.type = none-streamv2 ## data-env-vars.name = mercurial-public-2024-03-22-zstd-sparse-revlog pre-%ln-change: 1.263859 the-%ln-change: 0.700229 (-44.60%, -0.56) prev-changeset: 0.496050 (-60.75%, -0.77) this-changeset: 0.495243 (-60.81%, -0.77) ## data-env-vars.name = tryton-public-2024-03-22-zstd-sparse-revlog pre-%ln-change: 2.975765 the-%ln-change: 1.870798 (-37.13%, -1.10) prev-changeset: 1.461583 (-50.88%, -1.51) this-changeset: 1.469185 (-50.63%, -1.51) ## data-env-vars.name = pypy-2024-03-22-zstd-sparse-revlog pre-%ln-change: 4.540080 the-%ln-change: 3.401700 (-25.07%, -1.14) prev-changeset: 2.915810 (-35.78%, -1.62) this-changeset: 2.911643 (-35.87%, -1.63) ## data-env-vars.name = heptapod-public-2024-03-25-zstd-sparse-revlog pre-%ln-change: 10.138396 the-%ln-change: 7.750458 (-23.55%, -2.39) prev-changeset: 6.665565 (-34.25%, -3.47) this-changeset: 6.672078 (-34.19%, -3.47) ## data-env-vars.name = mozilla-try-2023-03-22-zstd-sparse-revlog pre-%ln-change: 399.484481 the-%ln-change: 346.508952 (-13.26%, -52.98) prev-changeset: 316.749699 (-20.71%, -82.73) this-changeset: 311.165461 (-22.11%, -88.32)

# automv.py
#
# Copyright 2013-2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""check for unrecorded moves at commit time (EXPERIMENTAL)

This extension checks at commit/amend time if any of the committed files
comes from an unrecorded mv.

The threshold at which a file is considered a move can be set with the
``automv.similarity`` config option. This option takes a percentage between 0
(disabled) and 100 (files must be identical), the default is 95.

"""

# Using 95 as a default similarity is based on an analysis of the mercurial
# repositories of the cpython, mozilla-central & mercurial repositories, as
# well as 2 very large facebook repositories. At 95 50% of all potential
# missed moves would be caught, as well as correspond with 87% of all
# explicitly marked moves.  Together, 80% of moved files are 95% similar or
# more.
#
# See http://markmail.org/thread/5pxnljesvufvom57 for context.


from mercurial.i18n import _
from mercurial import (
    commands,
    copies,
    error,
    extensions,
    pycompat,
    registrar,
    scmutil,
    similar,
)

configtable = {}
configitem = registrar.configitem(configtable)

configitem(
    b'automv',
    b'similarity',
    default=95,
)


def extsetup(ui):
    entry = extensions.wrapcommand(commands.table, b'commit', mvcheck)
    entry[1].append(
        (b'', b'no-automv', None, _(b'disable automatic file move detection'))
    )


def mvcheck(orig, ui, repo, *pats, **opts):
    """Hook to check for moves at commit time"""
    renames = None
    disabled = opts.pop('no_automv', False)
    with repo.wlock():
        if not disabled:
            threshold = ui.configint(b'automv', b'similarity')
            if not 0 <= threshold <= 100:
                raise error.Abort(
                    _(b'automv.similarity must be between 0 and 100')
                )
            if threshold > 0:
                match = scmutil.match(
                    repo[None], pats, pycompat.byteskwargs(opts)
                )
                added, removed = _interestingfiles(repo, match)
                uipathfn = scmutil.getuipathfn(repo, legacyrelativevalue=True)
                renames = _findrenames(
                    repo, uipathfn, added, removed, threshold / 100.0
                )

        if renames is not None:
            with repo.dirstate.changing_files(repo):
                # XXX this should be wider and integrated with the commit
                # transaction. At the same time as we do the `addremove` logic
                # for commit.  However we can't really do better with the
                # current extension structure, and this is not worse than what
                # happened before.
                scmutil._markchanges(repo, (), (), renames)
        return orig(ui, repo, *pats, **opts)


def _interestingfiles(repo, matcher):
    """Find what files were added or removed in this commit.

    Returns a tuple of two lists: (added, removed). Only files not *already*
    marked as moved are included in the added list.

    """
    stat = repo.status(match=matcher)
    added = stat.added
    removed = stat.removed

    copy = copies.pathcopies(repo[b'.'], repo[None], matcher)
    # remove the copy files for which we already have copy info
    added = [f for f in added if f not in copy]

    return added, removed


def _findrenames(repo, uipathfn, added, removed, similarity):
    """Find what files in added are really moved files.

    Any file named in removed that is at least similarity% similar to a file
    in added is seen as a rename.

    """
    renames = {}
    if similarity > 0:
        for src, dst, score in similar.findrenames(
            repo, added, removed, similarity
        ):
            if repo.ui.verbose:
                repo.ui.status(
                    _(b'detected move of %s as %s (%d%% similar)\n')
                    % (uipathfn(src), uipathfn(dst), score * 100)
                )
            renames[dst] = src
    if renames:
        repo.ui.status(_(b'detected move of %d files\n') % len(renames))
    return renames