hgext/automv.py
author Georges Racinet <georges.racinet@octobus.net>
Wed, 16 Jan 2019 16:19:26 +0100
changeset 41764 37ead13fb3d4
parent 41660 f89aad980025
child 42543 abd902a85040
permissions -rw-r--r--
rust-cpython: using rustext.dagop.headrevs in revlog As with the previous oxidation series, revlog plays the role of the factory, either using its parents function, or passing the index. We include below results of revsetbenchmarks.py taken on the PyPy repository on those of contrib/all-revsets.tx that involve `heads()`. In most of the cases, this seems to be either neutral or an improvement. In the cases where it's actually a bit slower, we suspect that differences in `heads()` performance is actually burried in variance on the incoming revset (probably several orders of magnitude slower). The precheck for filtered revisions of parent changeset has a significative performance benefit, too. Result by revset ================ Revision: 0) 0c7b353ce100; rust-cpython: binding for headrevs() 1) Parent of this changeset; changelog: prefilter in headrevs() 2) This changeset revset #0: heads(commonancestors(last(head(), 2))) plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast 0) 0.001379 0.001361 0.001381 0.001410 0.001393 0.001372 0.001414 0.001387 0.001411 0.001429 0.001415 1) 0.001351 0.001373 0.001383 0.001392 0.001401 0.001385 0.001405 0.001406 0.001385 0.001424 0.001399 2) 0.001365 0.001362 0.001375 0.001393 0.001370 0.001365 0.001413 0.001386 0.001377 0.001415 0.001411 revset #1: heads(commonancestors(head())) plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast 0) 0.047578 0.048578 0.047764 0.048065 0.047289 0.047305 0.047729 0.047370 0.047611 0.048005 0.047755 1) 0.048072 0.047471 0.048351 0.048193 0.048380 0.047968 0.047683 0.047355 0.048587 0.047044 0.048299 2) 0.047124 0.046699 0.046896 0.047250 0.046920 0.047379 0.046855 0.047753 0.047289 0.047219 0.046991 revset #2: heads(all()) plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast 0) 0.037654 0.037814 0.037149 0.037457 0.037609 0.037053 0.036825 0.037054 0.037739 0.036816 0.037604 1) 0.021845 58% 0.022172 58% 0.022148 59% 0.022059 58% 0.022261 59% 0.022246 60% 0.021691 58% 0.021967 59% 0.022156 58% 0.021820 59% 0.023141 61% 2) 0.014459 66% 0.014470 65% 0.014420 65% 0.014413 65% 0.014421 64% 0.014492 65% 0.014512 66% 0.014579 66% 0.014500 65% 0.014501 66% 0.014537 62% revset #3: heads(-10000:-1) plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast 0) 0.003696 0.003681 0.003719 0.003746 0.003725 0.003750 0.003692 0.003747 0.003712 0.003754 0.003763 1) 0.002131 57% 0.002142 58% 0.002147 57% 0.002203 58% 0.002143 57% 0.002208 58% 0.002158 58% 0.002182 58% 0.002169 58% 0.002209 58% 0.002201 58% 2) 0.001490 69% 0.001524 71% 0.001515 70% 0.001528 69% 0.001531 71% 0.001520 68% 0.001549 71% 0.001542 70% 0.001560 71% 0.001559 70% 0.001544 70% revset #4: (-5000:-1000) and heads(-10000:-1) plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast 0) 0.003832 0.003816 0.003747 0.003814 0.003749 0.003894 0.003784 0.003796 0.003915 0.003829 0.003795 1) 0.002282 59% 0.002208 57% 0.002220 59% 0.002240 58% 0.002210 58% 0.002276 58% 0.002250 59% 0.002250 59% 0.002311 59% 0.002230 58% 0.002241 59% 2) 0.001658 72% 0.001662 75% 0.001568 70% 0.001599 71% 0.001588 71% 0.001696 74% 0.001615 71% 0.001593 70% 0.001710 73% 0.001622 72% 0.001616 72% revset #5: heads(matching(tip, "author")) plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast 0) 7.826449 7.563260 7.581034 7.688493 7.634001 7.777860 7.768228 8.026097 7.767422 7.565254 7.938643 1) 7.750766 7.562555 7.660426 7.574089 7.492220 7.438582 7.562015 7.530635 93% 7.636343 7.636712 7.645113 2) 7.617941 7.519601 7.584922 7.507653 7.547440 7.524436 7.575291 7.883991 7.792142 7.709622 7.868595 revset #6: heads(matching(tip, "author")) and -10000:-1 plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast 0) 7.744489 7.728684 7.734065 7.928513 7.875949 7.883727 7.815492 7.791335 7.784793 7.761218 7.815731 1) 7.808956 7.480446 7.618759 7.920270 7.676343 7.803613 7.770210 7.713100 7.584420 7.767335 7.825140 2) 7.519987 7.938748 106% 7.805328 7.694162 7.750129 7.714229 7.603825 7.580734 7.555291 7.524207 7.504580 revset #7: (-10000:-1) and heads(matching(tip, "author")) plain min max first last reverse rev..rst rev..ast sort sor..rst sor..ast 0) 7.909321 7.694357 7.666021 7.538686 7.771821 7.876217 7.852103 7.812727 7.545919 7.788860 7.764585 1) 7.749232 7.683715 7.968393 7.895257 7.764160 8.314884 105% 7.921697 7.882613 7.867209 7.684707 7.544501 2) 7.824903 7.784605 7.727846 7.566613 7.581994 7.539205 90% 7.555316 7.535572 7.581786 7.901795 7.662832

# automv.py
#
# Copyright 2013-2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""check for unrecorded moves at commit time (EXPERIMENTAL)

This extension checks at commit/amend time if any of the committed files
comes from an unrecorded mv.

The threshold at which a file is considered a move can be set with the
``automv.similarity`` config option. This option takes a percentage between 0
(disabled) and 100 (files must be identical), the default is 95.

"""

# Using 95 as a default similarity is based on an analysis of the mercurial
# repositories of the cpython, mozilla-central & mercurial repositories, as
# well as 2 very large facebook repositories. At 95 50% of all potential
# missed moves would be caught, as well as correspond with 87% of all
# explicitly marked moves.  Together, 80% of moved files are 95% similar or
# more.
#
# See http://markmail.org/thread/5pxnljesvufvom57 for context.

from __future__ import absolute_import

from mercurial.i18n import _
from mercurial import (
    commands,
    copies,
    error,
    extensions,
    pycompat,
    registrar,
    scmutil,
    similar
)

configtable = {}
configitem = registrar.configitem(configtable)

configitem('automv', 'similarity',
    default=95,
)

def extsetup(ui):
    entry = extensions.wrapcommand(
        commands.table, 'commit', mvcheck)
    entry[1].append(
        ('', 'no-automv', None,
         _('disable automatic file move detection')))

def mvcheck(orig, ui, repo, *pats, **opts):
    """Hook to check for moves at commit time"""
    opts = pycompat.byteskwargs(opts)
    renames = None
    disabled = opts.pop('no_automv', False)
    if not disabled:
        threshold = ui.configint('automv', 'similarity')
        if not 0 <= threshold <= 100:
            raise error.Abort(_('automv.similarity must be between 0 and 100'))
        if threshold > 0:
            match = scmutil.match(repo[None], pats, opts)
            added, removed = _interestingfiles(repo, match)
            uipathfn = scmutil.getuipathfn(repo, legacyrelativevalue=True)
            renames = _findrenames(repo, uipathfn, added, removed,
                                   threshold / 100.0)

    with repo.wlock():
        if renames is not None:
            scmutil._markchanges(repo, (), (), renames)
        return orig(ui, repo, *pats, **pycompat.strkwargs(opts))

def _interestingfiles(repo, matcher):
    """Find what files were added or removed in this commit.

    Returns a tuple of two lists: (added, removed). Only files not *already*
    marked as moved are included in the added list.

    """
    stat = repo.status(match=matcher)
    added = stat[1]
    removed = stat[2]

    copy = copies._forwardcopies(repo['.'], repo[None], matcher)
    # remove the copy files for which we already have copy info
    added = [f for f in added if f not in copy]

    return added, removed

def _findrenames(repo, uipathfn, added, removed, similarity):
    """Find what files in added are really moved files.

    Any file named in removed that is at least similarity% similar to a file
    in added is seen as a rename.

    """
    renames = {}
    if similarity > 0:
        for src, dst, score in similar.findrenames(
                repo, added, removed, similarity):
            if repo.ui.verbose:
                repo.ui.status(
                    _('detected move of %s as %s (%d%% similar)\n') % (
                        uipathfn(src), uipathfn(dst), score * 100))
            renames[dst] = src
    if renames:
        repo.ui.status(_('detected move of %d files\n') % len(renames))
    return renames