revset: stop serializing node when using "%ln"
Turning hundred of thousand of node from node to hex and back can be slow… what
about we stop doing it?
In many case were we are using node id we should be using revision id. However
this is not a good reason to have a stupidly slow implementation of "%ln".
This caught my attention again because the phase discovery during push make an
extensive use of "%ln" or huge set. In absolute, that phase discovery probably
should use "%ld" and need to improves its algorithmic complexity, but improving
"%ln" seems simple and long overdue. This greatly speeds up `hg push` on
repository with many drafts.
Here are some relevant poulpe benchmarks:
### data-env-vars.name = mozilla-try-2023-03-22-zstd-sparse-revlog
# benchmark.name = hg.command.push
# bin-env-vars.hg.flavor = default
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.explicit-rev = all-out-heads
# benchmark.variants.issue6528 = disabled
# benchmark.variants.protocol = ssh
# benchmark.variants.reuse-external-delta-parent = default
## benchmark.variants.revs = any-1-extra-rev
before: 44.235070
after: 20.416329 (-53.85%, -23.82)
## benchmark.variants.revs = any-100-extra-rev
before: 49.234697
after: 26.519829 (-46.14%, -22.71)
### benchmark.name = hg.command.bundle
# bin-env-vars.hg.flavor = default
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.revs = all
# benchmark.variants.type = none-streamv2
## data-env-vars.name = heptapod-public-2024-03-25-zstd-sparse-revlog
before: 10.138396
after: 7.750458 (-23.55%, -2.39)
## data-env-vars.name = mercurial-public-2024-03-22-zstd-sparse-revlog
before: 1.263859
after: 0.700229 (-44.60%, -0.56)
## data-env-vars.name = mozilla-try-2023-03-22-zstd-sparse-revlog
before: 399.484481
after: 346.5089 (-13.26%, -52.98)
## data-env-vars.name = pypy-2024-03-22-zstd-sparse-revlog
before: 4.540080
after: 3.401700 (-25.07%, -1.14)
## data-env-vars.name = tryton-public-2024-03-22-zstd-sparse-revlog
before: 2.975765
after: 1.870798 (-37.13%, -1.10)
#!/usr/bin/env python2
import argparse
import os
import subprocess
import sys
try:
# Always load hg libraries from the hg we can find on $PATH.
hglib = subprocess.check_output(['hg', 'debuginstall', '-T', '{hgmodules}'])
sys.path.insert(0, os.path.dirname(hglib))
except subprocess.CalledProcessError:
# We're probably running with a PyOxidized Mercurial, so just
# proceed and hope it works out okay.
pass
from mercurial import util
ap = argparse.ArgumentParser()
ap.add_argument(
'--paranoid',
action='store_true',
help=(
"Be paranoid about how version numbers compare and "
"produce something that's more likely to sort "
"reasonably."
),
)
ap.add_argument('--selftest', action='store_true', help='Run self-tests.')
ap.add_argument('versionfile', help='Path to a valid mercurial __version__.py')
def paranoidver(ver):
"""Given an hg version produce something that distutils can sort.
Some Mac package management systems use distutils code in order to
figure out upgrades, which makes life difficult. The test case is
a reduced version of code in the Munki tool used by some large
organizations to centrally manage OS X packages, which is what
inspired this kludge.
>>> paranoidver('3.4')
'3.4.0'
>>> paranoidver('3.4.2')
'3.4.2'
>>> paranoidver('3.0-rc+10')
'2.9.9999-rc+10'
>>> paranoidver('4.2+483-5d44d7d4076e')
'4.2.0+483-5d44d7d4076e'
>>> paranoidver('4.2.1+598-48d1e1214d8c')
'4.2.1+598-48d1e1214d8c'
>>> paranoidver('4.3-rc')
'4.2.9999-rc'
>>> paranoidver('4.3')
'4.3.0'
>>> from distutils import version
>>> class LossyPaddedVersion(version.LooseVersion):
... '''Subclass version.LooseVersion to compare things like
... "10.6" and "10.6.0" as equal'''
... def __init__(self, s):
... self.parse(s)
...
... def _pad(self, version_list, max_length):
... 'Pad a version list by adding extra 0 components to the end'
... # copy the version_list so we don't modify it
... cmp_list = list(version_list)
... while len(cmp_list) < max_length:
... cmp_list.append(0)
... return cmp_list
...
... def __cmp__(self, other):
... if isinstance(other, str):
... other = MunkiLooseVersion(other)
... max_length = max(len(self.version), len(other.version))
... self_cmp_version = self._pad(self.version, max_length)
... other_cmp_version = self._pad(other.version, max_length)
... return cmp(self_cmp_version, other_cmp_version)
>>> def testver(older, newer):
... o = LossyPaddedVersion(paranoidver(older))
... n = LossyPaddedVersion(paranoidver(newer))
... return o < n
>>> testver('3.4', '3.5')
True
>>> testver('3.4.0', '3.5-rc')
True
>>> testver('3.4-rc', '3.5')
True
>>> testver('3.4-rc+10-deadbeef', '3.5')
True
>>> testver('3.4.2', '3.5-rc')
True
>>> testver('3.4.2', '3.5-rc+10-deadbeef')
True
>>> testver('4.2+483-5d44d7d4076e', '4.2.1+598-48d1e1214d8c')
True
>>> testver('4.3-rc', '4.3')
True
>>> testver('4.3', '4.3-rc')
False
"""
major, minor, micro, extra = util.versiontuple(ver, n=4)
if micro is None:
micro = 0
if extra:
if extra.startswith('rc'):
if minor == 0:
major -= 1
minor = 9
else:
minor -= 1
micro = 9999
extra = '-' + extra
else:
extra = '+' + extra
else:
extra = ''
return '%d.%d.%d%s' % (major, minor, micro, extra)
def main(argv):
opts = ap.parse_args(argv[1:])
if opts.selftest:
import doctest
doctest.testmod()
return
with open(opts.versionfile) as f:
for l in f:
if l.startswith('version = b'):
# version number is entire line minus the quotes
ver = l[len('version = b') + 1 : -2]
break
if opts.paranoid:
print(paranoidver(ver))
else:
print(ver)
if __name__ == '__main__':
main(sys.argv)