hgext/schemes.py
author Pierre-Yves David <pierre-yves.david@octobus.net>
Sat, 13 Apr 2024 23:40:28 +0200
changeset 51592 24844407fa0d
parent 50791 d51a76b5262b
permissions -rw-r--r--
perf: clear vfs audit_cache before each run When generating a stream clone, we spend a large amount of time auditing path. Before this changes, the first run was warming the vfs cache for the other runs, leading to a large runtime difference and a "faulty" reported timing for the operation. We now clear this important cache between run to get a more realistic timing. Below are some example of median time change when clearing these cases. The maximum time for a run did not changed significantly. ### data-env-vars.name = mozilla-central-2018-08-01-zstd-sparse-revlog # benchmark.name = hg.perf.exchange.stream.generate # bin-env-vars.hg.flavor = default # bin-env-vars.hg.py-re2-module = default # benchmark.variants.version = latest no-clearing: 17.289905 cache-clearing: 21.587965 (+24.86%, +4.30) ## data-env-vars.name = mozilla-central-2024-03-22-zstd-sparse-revlog no-clearing: 32.670748 cache-clearing: 40.467095 (+23.86%, +7.80) ## data-env-vars.name = mozilla-try-2019-02-18-zstd-sparse-revlog no-clearing: 37.838858 cache-clearing: 46.072749 (+21.76%, +8.23) ## data-env-vars.name = mozilla-unified-2024-03-22-zstd-sparse-revlog no-clearing: 32.969395 cache-clearing: 39.646209 (+20.25%, +6.68) In addition, this significantly reduce the timing difference between the performance command, from the perf extensions and a `real `hg bundle` call producing a stream bundle. Some significant differences remain especially on the "mozilla-try" repositories, but they are now smaller. Note that some of that difference will actually not be attributable to the stream generation (like maybe phases or branch map computation). Below are some benchmarks done on a currently draft changeset fixing some unrelated slowness in `hg bundle` (34a78972af409d1ff37c29e60f6ca811ad1a457d) ### data-env-vars.name = mozilla-central-2018-08-01-zstd-sparse-revlog # bin-env-vars.hg.flavor = default # bin-env-vars.hg.py-re2-module = default hg.perf.exchange.stream.generate: 21.587965 hg.command.bundle: 24.301799 (+12.57%, +2.71) ## data-env-vars.name = mozilla-central-2024-03-22-zstd-sparse-revlog hg.perf.exchange.stream.generate: 40.467095 hg.command.bundle: 44.831317 (+10.78%, +4.36) ## data-env-vars.name = mozilla-unified-2024-03-22-zstd-sparse-revlog hg.perf.exchange.stream.generate: 39.646209 hg.command.bundle: 45.395258 (+14.50%, +5.75) ## data-env-vars.name = mozilla-try-2019-02-18-zstd-sparse-revlog hg.perf.exchange.stream.generate: 46.072749 hg.command.bundle: 55.882608 (+21.29%, +9.81) ## data-env-vars.name = mozilla-try-2023-03-22-zlib-general-delta hg.perf.exchange.stream.generate: 334.716708 hg.command.bundle: 377.856767 (+12.89%, +43.14) ## data-env-vars.name = mozilla-try-2023-03-22-zstd-sparse-revlog hg.perf.exchange.stream.generate: 302.972301 hg.command.bundle: 326.098755 (+7.63%, +23.13)

# Copyright 2009, Alexander Solovyov <piranha@piranha.org.ua>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

"""extend schemes with shortcuts to repository swarms

This extension allows you to specify shortcuts for parent URLs with a
lot of repositories to act like a scheme, for example::

  [schemes]
  py = http://code.python.org/hg/

After that you can use it like::

  hg clone py://trunk/

Additionally there is support for some more complex schemas, for
example used by Google Code::

  [schemes]
  gcode = http://{1}.googlecode.com/hg/

The syntax is taken from Mercurial templates, and you have unlimited
number of variables, starting with ``{1}`` and continuing with
``{2}``, ``{3}`` and so on. This variables will receive parts of URL
supplied, split by ``/``. Anything not specified as ``{part}`` will be
just appended to an URL.

For convenience, the extension adds these schemes by default::

  [schemes]
  py = http://hg.python.org/
  bb = https://bitbucket.org/
  bb+ssh = ssh://hg@bitbucket.org/
  gcode = https://{1}.googlecode.com/hg/
  kiln = https://{1}.kilnhg.com/Repo/

You can override a predefined scheme by defining a new scheme with the
same name.
"""

import os
import re

from mercurial.i18n import _
from mercurial import (
    error,
    extensions,
    hg,
    pycompat,
    registrar,
    templater,
)
from mercurial.utils import (
    urlutil,
)

cmdtable = {}
command = registrar.command(cmdtable)
# Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
# extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
# be specifying the version(s) of Mercurial they are tested with, or
# leave the attribute unspecified.
testedwith = b'ships-with-hg-core'

_partre = re.compile(br'{(\d+)\}')


class ShortRepository:
    def __init__(self, url, scheme, templater):
        self.scheme = scheme
        self.templater = templater
        self.url = url
        try:
            self.parts = max(map(int, _partre.findall(self.url)))
        except ValueError:
            self.parts = 0

    def __repr__(self):
        return b'<ShortRepository: %s>' % self.scheme

    def make_peer(self, ui, path, *args, **kwargs):
        new_url = self.resolve(path.rawloc)
        path = path.copy(new_raw_location=new_url)
        cls = hg.peer_schemes.get(path.url.scheme)
        if cls is not None:
            return cls.make_peer(ui, path, *args, **kwargs)
        return None

    def instance(self, ui, url, create, intents=None, createopts=None):
        url = self.resolve(url)
        u = urlutil.url(url)
        scheme = u.scheme or b'file'
        if scheme in hg.peer_schemes:
            cls = hg.peer_schemes[scheme]
        elif scheme in hg.repo_schemes:
            cls = hg.repo_schemes[scheme]
        else:
            cls = hg.LocalFactory
        return cls.instance(
            ui, url, create, intents=intents, createopts=createopts
        )

    def resolve(self, url):
        # Should this use the urlutil.url class, or is manual parsing better?
        try:
            url = url.split(b'://', 1)[1]
        except IndexError:
            raise error.Abort(_(b"no '://' in scheme url '%s'") % url)
        parts = url.split(b'/', self.parts)
        if len(parts) > self.parts:
            tail = parts[-1]
            parts = parts[:-1]
        else:
            tail = b''
        context = {b'%d' % (i + 1): v for i, v in enumerate(parts)}
        return b''.join(self.templater.process(self.url, context)) + tail


def hasdriveletter(orig, path):
    if path:
        for scheme in schemes:
            if path.startswith(scheme + b':'):
                return False
    return orig(path)


schemes = {
    b'py': b'http://hg.python.org/',
    b'bb': b'https://bitbucket.org/',
    b'bb+ssh': b'ssh://hg@bitbucket.org/',
    b'gcode': b'https://{1}.googlecode.com/hg/',
    b'kiln': b'https://{1}.kilnhg.com/Repo/',
}


def _check_drive_letter(scheme: bytes) -> None:
    """check if a scheme conflict with a Windows drive letter"""
    if (
        pycompat.iswindows
        and len(scheme) == 1
        and scheme.isalpha()
        and os.path.exists(b'%s:\\' % scheme)
    ):
        msg = _(b'custom scheme %s:// conflicts with drive letter %s:\\\n')
        msg %= (scheme, scheme.upper())
        raise error.Abort(msg)


def extsetup(ui):
    schemes.update(dict(ui.configitems(b'schemes')))
    t = templater.engine(templater.parse)
    for scheme, url in schemes.items():
        _check_drive_letter(scheme)
        url_scheme = urlutil.url(url).scheme
        if url_scheme in hg.peer_schemes:
            hg.peer_schemes[scheme] = ShortRepository(url, scheme, t)
        else:
            hg.repo_schemes[scheme] = ShortRepository(url, scheme, t)

    extensions.wrapfunction(urlutil, 'hasdriveletter', hasdriveletter)


@command(b'debugexpandscheme', norepo=True)
def expandscheme(ui, url, **opts):
    """given a repo path, provide the scheme-expanded path"""
    scheme = urlutil.url(url).scheme
    if scheme in hg.peer_schemes:
        cls = hg.peer_schemes[scheme]
    else:
        cls = hg.repo_schemes.get(scheme)
    if cls is not None and isinstance(cls, ShortRepository):
        url = cls.resolve(url)
    ui.write(url + b'\n')