# HG changeset patch # User Pierre-Yves David # Date 1668525923 -3600 # Node ID 9cac281eb9c001e4c8a21404ed9fba07651ab2af # Parent 78ba41878f2e558b771f74809aa6646eb16e1677 debug: add an option to display statistic about a bundling operation This will helps a lot to understand how the bundling decision might impact pull/unbundle on the other side. diff -r 78ba41878f2e -r 9cac281eb9c0 hgext/remotefilelog/remotefilelog.py --- a/hgext/remotefilelog/remotefilelog.py Tue Nov 15 18:08:56 2022 +0100 +++ b/hgext/remotefilelog/remotefilelog.py Tue Nov 15 16:25:23 2022 +0100 @@ -299,6 +299,7 @@ deltaprevious=False, deltamode=None, sidedata_helpers=None, + debug_info=None, ): # we don't use any of these parameters here del nodesorder, revisiondata, assumehaveparentrevisions, deltaprevious diff -r 78ba41878f2e -r 9cac281eb9c0 hgext/sqlitestore.py --- a/hgext/sqlitestore.py Tue Nov 15 18:08:56 2022 +0100 +++ b/hgext/sqlitestore.py Tue Nov 15 16:25:23 2022 +0100 @@ -589,6 +589,7 @@ assumehaveparentrevisions=False, deltamode=repository.CG_DELTAMODE_STD, sidedata_helpers=None, + debug_info=None, ): if nodesorder not in (b'nodes', b'storage', b'linear', None): raise error.ProgrammingError( diff -r 78ba41878f2e -r 9cac281eb9c0 mercurial/changegroup.py --- a/mercurial/changegroup.py Tue Nov 15 18:08:56 2022 +0100 +++ b/mercurial/changegroup.py Tue Nov 15 16:25:23 2022 +0100 @@ -869,6 +869,7 @@ fullclnodes=None, precomputedellipsis=None, sidedata_helpers=None, + debug_info=None, ): """Calculate deltas for a set of revisions. @@ -978,6 +979,7 @@ assumehaveparentrevisions=not ellipses, deltamode=deltamode, sidedata_helpers=sidedata_helpers, + debug_info=debug_info, ) for i, revision in enumerate(revisions): @@ -1003,6 +1005,187 @@ progress.complete() +def make_debug_info(): + """ "build a "new" debug_info dictionnary + + That dictionnary can be used to gather information about the bundle process + """ + return { + 'revision-total': 0, + 'revision-changelog': 0, + 'revision-manifest': 0, + 'revision-files': 0, + 'file-count': 0, + 'merge-total': 0, + 'available-delta': 0, + 'available-full': 0, + 'delta-against-prev': 0, + 'delta-full': 0, + 'delta-against-p1': 0, + 'denied-delta-candeltafn': 0, + 'denied-base-not-available': 0, + 'reused-storage-delta': 0, + 'computed-delta': 0, + } + + +def merge_debug_info(base, other): + """merge the debug information from into + + This function can be used to gather lower level information into higher level ones. + """ + for key in ( + 'revision-total', + 'revision-changelog', + 'revision-manifest', + 'revision-files', + 'merge-total', + 'available-delta', + 'available-full', + 'delta-against-prev', + 'delta-full', + 'delta-against-p1', + 'denied-delta-candeltafn', + 'denied-base-not-available', + 'reused-storage-delta', + 'computed-delta', + ): + base[key] += other[key] + + +_KEY_PART_WIDTH = 17 + + +def _dbg_bdl_line( + ui, + indent, + key, + base_value=None, + percentage_base=None, + percentage_key=None, + percentage_ref=None, + extra=None, +): + """Print one line of debug_bundle_debug_info""" + line = b"DEBUG-BUNDLING: " + line += b' ' * (2 * indent) + key += b":" + if base_value is not None: + assert len(key) + 1 + (2 * indent) <= _KEY_PART_WIDTH + line += key.ljust(_KEY_PART_WIDTH - (2 * indent)) + line += b"%10d" % base_value + else: + line += key + + if percentage_base is not None: + assert base_value is not None + percentage = base_value * 100 // percentage_base + if percentage_key is not None: + line += b" (%d%% of %s %d)" % ( + percentage, + percentage_key, + percentage_ref, + ) + else: + line += b" (%d%%)" % percentage + + if extra: + line += b" " + line += extra + + line += b'\n' + ui.write_err(line) + + +def display_bundling_debug_info( + ui, + debug_info, + cl_debug_info, + mn_debug_info, + fl_debug_info, +): + """display debug information gathered during a bundling through `ui`""" + d = debug_info + c = cl_debug_info + m = mn_debug_info + f = fl_debug_info + all_info = [ + (b"changelog", b"cl", c), + (b"manifests", b"mn", m), + (b"files", b"fl", f), + ] + _dbg_bdl_line(ui, 0, b'revisions', d['revision-total']) + _dbg_bdl_line(ui, 1, b'changelog', d['revision-changelog']) + _dbg_bdl_line(ui, 1, b'manifest', d['revision-manifest']) + extra = b'(for %d revlogs)' % d['file-count'] + _dbg_bdl_line(ui, 1, b'files', d['revision-files'], extra=extra) + if d['merge-total']: + _dbg_bdl_line(ui, 1, b'merge', d['merge-total'], d['revision-total']) + for k, __, v in all_info: + if v['merge-total']: + _dbg_bdl_line(ui, 2, k, v['merge-total'], v['revision-total']) + + _dbg_bdl_line(ui, 0, b'deltas') + _dbg_bdl_line( + ui, + 1, + b'from-storage', + d['reused-storage-delta'], + percentage_base=d['available-delta'], + percentage_key=b"available", + percentage_ref=d['available-delta'], + ) + + if d['denied-delta-candeltafn']: + _dbg_bdl_line(ui, 2, b'denied-fn', d['denied-delta-candeltafn']) + for __, k, v in all_info: + if v['denied-delta-candeltafn']: + _dbg_bdl_line(ui, 3, k, v['denied-delta-candeltafn']) + + if d['denied-base-not-available']: + _dbg_bdl_line(ui, 2, b'denied-nb', d['denied-base-not-available']) + for k, __, v in all_info: + if v['denied-base-not-available']: + _dbg_bdl_line(ui, 3, k, v['denied-base-not-available']) + + if d['computed-delta']: + _dbg_bdl_line(ui, 1, b'computed', d['computed-delta']) + + if d['available-full']: + _dbg_bdl_line( + ui, + 2, + b'full', + d['delta-full'], + percentage_base=d['available-full'], + percentage_key=b"native", + percentage_ref=d['available-full'], + ) + for k, __, v in all_info: + if v['available-full']: + _dbg_bdl_line( + ui, + 3, + k, + v['delta-full'], + percentage_base=v['available-full'], + percentage_key=b"native", + percentage_ref=v['available-full'], + ) + + if d['delta-against-prev']: + _dbg_bdl_line(ui, 2, b'previous', d['delta-against-prev']) + for k, __, v in all_info: + if v['delta-against-prev']: + _dbg_bdl_line(ui, 3, k, v['delta-against-prev']) + + if d['delta-against-p1']: + _dbg_bdl_line(ui, 2, b'parent-1', d['delta-against-prev']) + for k, __, v in all_info: + if v['delta-against-p1']: + _dbg_bdl_line(ui, 3, k, v['delta-against-p1']) + + class cgpacker: def __init__( self, @@ -1086,13 +1269,21 @@ self._verbosenote = lambda s: None def generate( - self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True + self, + commonrevs, + clnodes, + fastpathlinkrev, + source, + changelog=True, ): """Yield a sequence of changegroup byte chunks. If changelog is False, changelog data won't be added to changegroup """ + debug_info = None repo = self._repo + if repo.ui.configbool(b'debug', b'bundling-stats'): + debug_info = make_debug_info() cl = repo.changelog self._verbosenote(_(b'uncompressed size of bundle content:\n')) @@ -1107,14 +1298,19 @@ # correctly advertise its sidedata categories directly. remote_sidedata = repo._wanted_sidedata sidedata_helpers = sidedatamod.get_sidedata_helpers( - repo, remote_sidedata + repo, + remote_sidedata, ) + cl_debug_info = None + if debug_info is not None: + cl_debug_info = make_debug_info() clstate, deltas = self._generatechangelog( cl, clnodes, generate=changelog, sidedata_helpers=sidedata_helpers, + debug_info=cl_debug_info, ) for delta in deltas: for chunk in _revisiondeltatochunks( @@ -1126,6 +1322,9 @@ close = closechunk() size += len(close) yield closechunk() + if debug_info is not None: + merge_debug_info(debug_info, cl_debug_info) + debug_info['revision-changelog'] = cl_debug_info['revision-total'] self._verbosenote(_(b'%8.i (changelog)\n') % size) @@ -1133,6 +1332,9 @@ manifests = clstate[b'manifests'] changedfiles = clstate[b'changedfiles'] + if debug_info is not None: + debug_info['file-count'] = len(changedfiles) + # We need to make sure that the linkrev in the changegroup refers to # the first changeset that introduced the manifest or file revision. # The fastpath is usually safer than the slowpath, because the filelogs @@ -1156,6 +1358,9 @@ fnodes = {} # needed file nodes size = 0 + mn_debug_info = None + if debug_info is not None: + mn_debug_info = make_debug_info() it = self.generatemanifests( commonrevs, clrevorder, @@ -1165,6 +1370,7 @@ source, clstate[b'clrevtomanifestrev'], sidedata_helpers=sidedata_helpers, + debug_info=mn_debug_info, ) for tree, deltas in it: @@ -1185,6 +1391,9 @@ close = closechunk() size += len(close) yield close + if debug_info is not None: + merge_debug_info(debug_info, mn_debug_info) + debug_info['revision-manifest'] = mn_debug_info['revision-total'] self._verbosenote(_(b'%8.i (manifests)\n') % size) yield self._manifestsend @@ -1199,6 +1408,9 @@ manifests.clear() clrevs = {cl.rev(x) for x in clnodes} + fl_debug_info = None + if debug_info is not None: + fl_debug_info = make_debug_info() it = self.generatefiles( changedfiles, commonrevs, @@ -1208,6 +1420,7 @@ fnodes, clrevs, sidedata_helpers=sidedata_helpers, + debug_info=fl_debug_info, ) for path, deltas in it: @@ -1230,12 +1443,29 @@ self._verbosenote(_(b'%8.i %s\n') % (size, path)) yield closechunk() + if debug_info is not None: + merge_debug_info(debug_info, fl_debug_info) + debug_info['revision-files'] = fl_debug_info['revision-total'] + + if debug_info is not None: + display_bundling_debug_info( + repo.ui, + debug_info, + cl_debug_info, + mn_debug_info, + fl_debug_info, + ) if clnodes: repo.hook(b'outgoing', node=hex(clnodes[0]), source=source) def _generatechangelog( - self, cl, nodes, generate=True, sidedata_helpers=None + self, + cl, + nodes, + generate=True, + sidedata_helpers=None, + debug_info=None, ): """Generate data for changelog chunks. @@ -1332,6 +1562,7 @@ fullclnodes=self._fullclnodes, precomputedellipsis=self._precomputedellipsis, sidedata_helpers=sidedata_helpers, + debug_info=debug_info, ) return state, gen @@ -1346,6 +1577,7 @@ source, clrevtolocalrev, sidedata_helpers=None, + debug_info=None, ): """Returns an iterator of changegroup chunks containing manifests. @@ -1444,6 +1676,7 @@ fullclnodes=self._fullclnodes, precomputedellipsis=self._precomputedellipsis, sidedata_helpers=sidedata_helpers, + debug_info=debug_info, ) if not self._oldmatcher.visitdir(store.tree[:-1]): @@ -1483,6 +1716,7 @@ fnodes, clrevs, sidedata_helpers=None, + debug_info=None, ): changedfiles = [ f @@ -1578,6 +1812,7 @@ fullclnodes=self._fullclnodes, precomputedellipsis=self._precomputedellipsis, sidedata_helpers=sidedata_helpers, + debug_info=debug_info, ) yield fname, deltas @@ -1867,7 +2102,12 @@ def makechangegroup( - repo, outgoing, version, source, fastpath=False, bundlecaps=None + repo, + outgoing, + version, + source, + fastpath=False, + bundlecaps=None, ): cgstream = makestream( repo, @@ -1917,7 +2157,12 @@ repo.hook(b'preoutgoing', throw=True, source=source) _changegroupinfo(repo, csets, source) - return bundler.generate(commonrevs, csets, fastpathlinkrev, source) + return bundler.generate( + commonrevs, + csets, + fastpathlinkrev, + source, + ) def _addchangegroupfiles( diff -r 78ba41878f2e -r 9cac281eb9c0 mercurial/configitems.py --- a/mercurial/configitems.py Tue Nov 15 18:08:56 2022 +0100 +++ b/mercurial/configitems.py Tue Nov 15 16:25:23 2022 +0100 @@ -588,6 +588,12 @@ b'revlog.debug-delta', default=False, ) +# display extra information about the bundling process +coreconfigitem( + b'debug', + b'bundling-stats', + default=False, +) coreconfigitem( b'defaults', b'.*', diff -r 78ba41878f2e -r 9cac281eb9c0 mercurial/filelog.py --- a/mercurial/filelog.py Tue Nov 15 18:08:56 2022 +0100 +++ b/mercurial/filelog.py Tue Nov 15 16:25:23 2022 +0100 @@ -111,6 +111,7 @@ assumehaveparentrevisions=False, deltamode=repository.CG_DELTAMODE_STD, sidedata_helpers=None, + debug_info=None, ): return self._revlog.emitrevisions( nodes, @@ -119,6 +120,7 @@ assumehaveparentrevisions=assumehaveparentrevisions, deltamode=deltamode, sidedata_helpers=sidedata_helpers, + debug_info=debug_info, ) def addrevision( diff -r 78ba41878f2e -r 9cac281eb9c0 mercurial/manifest.py --- a/mercurial/manifest.py Tue Nov 15 18:08:56 2022 +0100 +++ b/mercurial/manifest.py Tue Nov 15 16:25:23 2022 +0100 @@ -1836,6 +1836,7 @@ assumehaveparentrevisions=False, deltamode=repository.CG_DELTAMODE_STD, sidedata_helpers=None, + debug_info=None, ): return self._revlog.emitrevisions( nodes, @@ -1844,6 +1845,7 @@ assumehaveparentrevisions=assumehaveparentrevisions, deltamode=deltamode, sidedata_helpers=sidedata_helpers, + debug_info=debug_info, ) def addgroup( diff -r 78ba41878f2e -r 9cac281eb9c0 mercurial/revlog.py --- a/mercurial/revlog.py Tue Nov 15 18:08:56 2022 +0100 +++ b/mercurial/revlog.py Tue Nov 15 16:25:23 2022 +0100 @@ -2889,6 +2889,7 @@ assumehaveparentrevisions=False, deltamode=repository.CG_DELTAMODE_STD, sidedata_helpers=None, + debug_info=None, ): if nodesorder not in (b'nodes', b'storage', b'linear', None): raise error.ProgrammingError( @@ -2918,6 +2919,7 @@ revisiondata=revisiondata, assumehaveparentrevisions=assumehaveparentrevisions, sidedata_helpers=sidedata_helpers, + debug_info=debug_info, ) DELTAREUSEALWAYS = b'always' diff -r 78ba41878f2e -r 9cac281eb9c0 mercurial/utils/storageutil.py --- a/mercurial/utils/storageutil.py Tue Nov 15 18:08:56 2022 +0100 +++ b/mercurial/utils/storageutil.py Tue Nov 15 16:25:23 2022 +0100 @@ -305,6 +305,7 @@ revisiondata=False, assumehaveparentrevisions=False, sidedata_helpers=None, + debug_info=None, ): """Generic implementation of ifiledata.emitrevisions(). @@ -370,6 +371,10 @@ ``sidedata_helpers`` (optional) If not None, means that sidedata should be included. See `revlogutil.sidedata.get_sidedata_helpers`. + + ``debug_info` + An optionnal dictionnary to gather information about the bundling + process (if present, see config: debug.bundling.stats. """ fnode = store.node @@ -395,23 +400,43 @@ if rev == nullrev: continue + debug_delta_source = None + if debug_info is not None: + debug_info['revision-total'] += 1 + node = fnode(rev) p1rev, p2rev = store.parentrevs(rev) + if debug_info is not None: + if p1rev != p2rev and p1rev != nullrev and p2rev != nullrev: + debug_info['merge-total'] += 1 + if deltaparentfn: deltaparentrev = deltaparentfn(rev) + if debug_info is not None: + if deltaparentrev == nullrev: + debug_info['available-full'] += 1 + else: + debug_info['available-delta'] += 1 + else: deltaparentrev = nullrev # Forced delta against previous mode. if deltamode == repository.CG_DELTAMODE_PREV: + if debug_info is not None: + debug_delta_source = "prev" baserev = prevrev # We're instructed to send fulltext. Honor that. elif deltamode == repository.CG_DELTAMODE_FULL: + if debug_info is not None: + debug_delta_source = "full" baserev = nullrev # We're instructed to use p1. Honor that elif deltamode == repository.CG_DELTAMODE_P1: + if debug_info is not None: + debug_delta_source = "p1" baserev = p1rev # There is a delta in storage. We try to use that because it @@ -421,20 +446,29 @@ # Base revision was already emitted in this group. We can # always safely use the delta. if deltaparentrev in available: + if debug_info is not None: + debug_delta_source = "storage" baserev = deltaparentrev # Base revision is a parent that hasn't been emitted already. # Use it if we can assume the receiver has the parent revision. elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev): + if debug_info is not None: + debug_delta_source = "storage" baserev = deltaparentrev - # No guarantee the receiver has the delta parent. Send delta # against last revision (if possible), which in the common case # should be similar enough to this revision that the delta is # reasonable. elif prevrev is not None: + if debug_info is not None: + debug_info['denied-base-not-available'] += 1 + debug_delta_source = "prev" baserev = prevrev else: + if debug_info is not None: + debug_info['denied-base-not-available'] += 1 + debug_delta_source = "full" baserev = nullrev # Storage has a fulltext revision. @@ -442,13 +476,24 @@ # Let's use the previous revision, which is as good a guess as any. # There is definitely room to improve this logic. elif prevrev is not None: + if debug_info is not None: + debug_delta_source = "prev" baserev = prevrev else: + if debug_info is not None: + debug_delta_source = "full" baserev = nullrev # But we can't actually use our chosen delta base for whatever # reason. Reset to fulltext. - if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)): + if ( + baserev != nullrev + and candeltafn is not None + and not candeltafn(baserev, rev) + ): + if debug_info is not None: + debug_delta_source = "full" + debug_info['denied-delta-candeltafn'] += 1 baserev = nullrev revision = None @@ -460,6 +505,9 @@ try: revision = store.rawdata(node) except error.CensoredNodeError as e: + if debug_info is not None: + debug_delta_source = "full" + debug_info['denied-delta-not-available'] += 1 revision = e.tombstone if baserev != nullrev: @@ -471,12 +519,46 @@ elif ( baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV ): + if debug_info is not None: + debug_info['computed-delta'] += 1 # close enough + debug_info['delta-full'] += 1 revision = store.rawdata(node) available.add(rev) else: if revdifffn: + if debug_info is not None: + if debug_delta_source == "full": + debug_info['computed-delta'] += 1 + debug_info['delta-full'] += 1 + elif debug_delta_source == "prev": + debug_info['computed-delta'] += 1 + debug_info['delta-against-prev'] += 1 + elif debug_delta_source == "p1": + debug_info['computed-delta'] += 1 + debug_info['delta-against-p1'] += 1 + elif debug_delta_source == "storage": + debug_info['reused-storage-delta'] += 1 + else: + assert False, 'unreachable' + delta = revdifffn(baserev, rev) else: + if debug_info is not None: + if debug_delta_source == "full": + debug_info['computed-delta'] += 1 + debug_info['delta-full'] += 1 + elif debug_delta_source == "prev": + debug_info['computed-delta'] += 1 + debug_info['delta-against-prev'] += 1 + elif debug_delta_source == "p1": + debug_info['computed-delta'] += 1 + debug_info['delta-against-p1'] += 1 + elif debug_delta_source == "storage": + # seem quite unlikelry to happens + debug_info['computed-delta'] += 1 + debug_info['reused-storage-delta'] += 1 + else: + assert False, 'unreachable' delta = mdiff.textdiff( store.rawdata(baserev), store.rawdata(rev) ) diff -r 78ba41878f2e -r 9cac281eb9c0 tests/test-bundle.t --- a/tests/test-bundle.t Tue Nov 15 18:08:56 2022 +0100 +++ b/tests/test-bundle.t Tue Nov 15 16:25:23 2022 +0100 @@ -1039,6 +1039,24 @@ $ hg bundle -a --config devel.bundle.delta=full ./full.hg 3 changesets found + +Test the debug statistic when building a bundle +----------------------------------------------- + + $ hg bundle -a ./default.hg --config debug.bundling-stats=yes + 3 changesets found + DEBUG-BUNDLING: revisions: 9 + DEBUG-BUNDLING: changelog: 3 + DEBUG-BUNDLING: manifest: 3 + DEBUG-BUNDLING: files: 3 (for 3 revlogs) + DEBUG-BUNDLING: deltas: + DEBUG-BUNDLING: from-storage: 2 (100% of available 2) + DEBUG-BUNDLING: computed: 7 + DEBUG-BUNDLING: full: 7 (100% of native 7) + DEBUG-BUNDLING: changelog: 3 (100% of native 3) + DEBUG-BUNDLING: manifests: 1 (100% of native 1) + DEBUG-BUNDLING: files: 3 (100% of native 3) + Test the debug output when applying delta -----------------------------------------