# HG changeset patch # User Gregory Szorc # Date 1476639532 25200 # Node ID 3e86261bf1109d876e653a9205061301d20fef71 # Parent f7ed5af31242010f6d75709867664108483a0708 exchange: refactor APIs to obtain bundle data (API) Currently, exchange.getbundle() returns either a cg1unpacker or a util.chunkbuffer (in the case of bundle2). This is kinda OK, as both expose a .read() to consumers. However, localpeer.getbundle() has code inferring what the response type is based on arguments and converts the util.chunkbuffer returned in the bundle2 case to a bundle2.unbundle20 instance. This is a sign that the API for exchange.getbundle() is not ideal because it doesn't consistently return an "unbundler" instance. In addition, unbundlers mask the fact that there is an underlying generator of changegroup data. In both cg1 and bundle2, this generator is being fed into a util.chunkbuffer so it can be re-exposed as a file object. util.chunkbuffer is a nice abstraction. However, it should only be used "at the edges." This is because keeping data as a generator is more efficient than converting it to a chunkbuffer, especially if we convert that chunkbuffer back to a generator (as is the case in some code paths currently). This patch refactors exchange.getbundle() into exchange.getbundlechunks(). The new API returns an iterator of chunks instead of a file-like object. Callers of exchange.getbundle() have been updated to use the new API. There is a minor change of behavior in test-getbundle.t. This is because `hg debuggetbundle` isn't defining bundlecaps. As a result, a cg1 data stream and unpacker is being produced. This is getting fed into a new bundle20 instance via bundle2.writebundle(), which uses a backchannel mechanism between changegroup generation to add the "nbchanges" part parameter. I never liked this backchannel mechanism and I plan to remove it someday. `hg bundle` still produces the "nbchanges" part parameter, so there should be no user-visible change of behavior. I consider this "regression" a bug in `hg debuggetbundle`. And that bug is captured by an existing "TODO" in the code to use bundle2 capabilities. diff -r f7ed5af31242 -r 3e86261bf110 mercurial/exchange.py --- a/mercurial/exchange.py Thu Oct 13 01:30:14 2016 +0200 +++ b/mercurial/exchange.py Sun Oct 16 10:38:52 2016 -0700 @@ -1532,20 +1532,14 @@ return any(cap.startswith('HG2') for cap in bundlecaps) return False -def getbundle(repo, source, heads=None, common=None, bundlecaps=None, - **kwargs): - """return a full bundle (with potentially multiple kind of parts) +def getbundlechunks(repo, source, heads=None, common=None, bundlecaps=None, + **kwargs): + """Return chunks constituting a bundle's raw data. Could be a bundle HG10 or a bundle HG20 depending on bundlecaps - passed. For now, the bundle can contain only changegroup, but this will - changes when more part type will be available for bundle2. + passed. - This is different from changegroup.getchangegroup that only returns an HG10 - changegroup bundle. They may eventually get reunited in the future when we - have a clearer idea of the API we what to query different data. - - The implementation is at a very early stage and will get massive rework - when the API of bundle is refined. + Returns an iterator over raw chunks (of varying sizes). """ usebundle2 = bundle2requested(bundlecaps) # bundle10 case @@ -1557,8 +1551,8 @@ raise ValueError(_('unsupported getbundle arguments: %s') % ', '.join(sorted(kwargs.keys()))) outgoing = _computeoutgoing(repo, heads, common) - return changegroup.getchangegroup(repo, source, outgoing, - bundlecaps=bundlecaps) + bundler = changegroup.getbundler('01', repo, bundlecaps) + return changegroup.getsubsetraw(repo, outgoing, bundler, source) # bundle20 case b2caps = {} @@ -1576,7 +1570,7 @@ func(bundler, repo, source, bundlecaps=bundlecaps, b2caps=b2caps, **kwargs) - return util.chunkbuffer(bundler.getchunks()) + return bundler.getchunks() @getbundle2partsgenerator('changegroup') def _getbundlechangegrouppart(bundler, repo, source, bundlecaps=None, diff -r f7ed5af31242 -r 3e86261bf110 mercurial/localrepo.py --- a/mercurial/localrepo.py Thu Oct 13 01:30:14 2016 +0200 +++ b/mercurial/localrepo.py Sun Oct 16 10:38:52 2016 -0700 @@ -149,14 +149,18 @@ def getbundle(self, source, heads=None, common=None, bundlecaps=None, **kwargs): - cg = exchange.getbundle(self._repo, source, heads=heads, - common=common, bundlecaps=bundlecaps, **kwargs) + chunks = exchange.getbundlechunks(self._repo, source, heads=heads, + common=common, bundlecaps=bundlecaps, + **kwargs) + cb = util.chunkbuffer(chunks) + if bundlecaps is not None and 'HG20' in bundlecaps: # When requesting a bundle2, getbundle returns a stream to make the # wire level function happier. We need to build a proper object # from it in local peer. - cg = bundle2.getunbundler(self.ui, cg) - return cg + return bundle2.getunbundler(self.ui, cb) + else: + return changegroup.getunbundler('01', cb, None) # TODO We might want to move the next two calls into legacypeer and add # unbundle instead. diff -r f7ed5af31242 -r 3e86261bf110 mercurial/wireproto.py --- a/mercurial/wireproto.py Thu Oct 13 01:30:14 2016 +0200 +++ b/mercurial/wireproto.py Sun Oct 16 10:38:52 2016 -0700 @@ -772,8 +772,10 @@ if not exchange.bundle2requested(opts.get('bundlecaps')): return ooberror(bundle2required) - cg = exchange.getbundle(repo, 'serve', **opts) - return streamres(proto.groupchunks(cg)) + chunks = exchange.getbundlechunks(repo, 'serve', **opts) + # TODO avoid util.chunkbuffer() here since it is adding overhead to + # what is fundamentally a generator proxying operation. + return streamres(proto.groupchunks(util.chunkbuffer(chunks))) @wireprotocommand('heads') def heads(repo, proto): diff -r f7ed5af31242 -r 3e86261bf110 tests/test-getbundle.t --- a/tests/test-getbundle.t Thu Oct 13 01:30:14 2016 +0200 +++ b/tests/test-getbundle.t Sun Oct 16 10:38:52 2016 -0700 @@ -170,7 +170,7 @@ $ hg debuggetbundle repo bundle -t bundle2 $ hg debugbundle bundle Stream params: {} - changegroup -- "sortdict([('version', '01'), ('nbchanges', '18')])" + changegroup -- "sortdict([('version', '01')])" 7704483d56b2a7b5db54dcee7c62378ac629b348 29a4d1f17bd3f0779ca0525bebb1cfb51067c738 713346a995c363120712aed1aee7e04afd867638