# HG changeset patch # User Pierre-Yves David # Date 1711459966 0 # Node ID 463e63aa547c48b8f891fcc9a1b3f00ceb376ee9 # Parent a452807df09b5b07e0c5830ee1b9271f4f1df81e stream-clone: disable gc for `_entries_walk` duration The number of small container created turn Python in a gc-frenzy that seriously impact performance. This significantly boost performance. The following number comes from a large private repository using perf::stream-locked-section: base-line: 35.04 seconds prev-change: 24.51 seconds (-30%) this-change: 20.88 seconds (-40% from baseline; -15% from previous changes) diff -r a452807df09b -r 463e63aa547c mercurial/streamclone.py --- a/mercurial/streamclone.py Tue Mar 26 13:28:52 2024 +0000 +++ b/mercurial/streamclone.py Tue Mar 26 13:32:46 2024 +0000 @@ -770,23 +770,26 @@ matcher = narrowspec.match(repo.root, includes, excludes) phase = not repo.publishing() - entries = _walkstreamfiles( - repo, - matcher, - phase=phase, - obsolescence=includeobsmarkers, - ) - for entry in entries: - yield (_srcstore, entry) + # Python is getting crazy at all the small container we creates, disabling + # the gc while we do so helps performance a lot. + with util.nogc(): + entries = _walkstreamfiles( + repo, + matcher, + phase=phase, + obsolescence=includeobsmarkers, + ) + for entry in entries: + yield (_srcstore, entry) - for name in cacheutil.cachetocopy(repo): - if repo.cachevfs.exists(name): - # not really a StoreEntry, but close enough - entry = store.SimpleStoreEntry( - entry_path=name, - is_volatile=True, - ) - yield (_srccache, entry) + for name in cacheutil.cachetocopy(repo): + if repo.cachevfs.exists(name): + # not really a StoreEntry, but close enough + entry = store.SimpleStoreEntry( + entry_path=name, + is_volatile=True, + ) + yield (_srccache, entry) def generatev2(repo, includes, excludes, includeobsmarkers):