clone-bundles: add a basic first version of automatic bundle generation
authorPierre-Yves David <pierre-yves.david@octobus.net>
Mon, 13 Mar 2023 17:34:18 +0100
changeset 50403 5ae30ff79c76
parent 50402 771294224bf6
child 50404 971dc2369b04
clone-bundles: add a basic first version of automatic bundle generation See inline documentation for details.
hgext/clonebundles.py
tests/test-clonebundles-autogen.t
--- a/hgext/clonebundles.py	Fri Apr 14 10:41:40 2023 +0200
+++ b/hgext/clonebundles.py	Mon Mar 13 17:34:18 2023 +0100
@@ -200,15 +200,72 @@
 occurs. So server operators should prepare for some people to follow these
 instructions when a failure occurs, thus driving more load to the original
 Mercurial server when the bundle hosting service fails.
+
+
+auto-generation of clone bundles
+--------------------------------
+
+It is possible to set Mercurial to automatically re-generate clone bundles when
+new content is available.
+
+Mercurial will take care of the process asynchronously. The defined list of
+bundle type will be generated, uploaded, and advertised.
+
+Bundles Generation:
+...................
+
+The extension can generate multiple variants of the clone bundle. Each
+different variant will be defined by the "bundle-spec" they use::
+
+    [clone-bundles]
+    auto-generate.formats= zstd-v2, gzip-v2
+
+See `hg help bundlespec` for details about available options.
+
+Bundles Upload and Serving:
+...........................
+
+The generated bundles need to be made available to users through a "public" URL.
+This should be donne through `clone-bundles.upload-command` configuration. The
+value of this command should be a shell command. It will have access to the
+bundle file path through the `$HGCB_BUNDLE_PATH` variable. And the expected
+basename in the "public" URL is accessible at::
+
+  [clone-bundles]
+  upload-command=sftp put $HGCB_BUNDLE_PATH \
+      sftp://bundles.host/clone-bundles/$HGCB_BUNDLE_BASENAME
+
+After upload, the file should be available at an url defined by
+`clone-bundles.url-template`.
+
+  [clone-bundles]
+  url-template=https://bundles.host/cache/clone-bundles/{basename}
 """
 
 
+import os
+import weakref
+
+from mercurial.i18n import _
+
 from mercurial import (
     bundlecaches,
+    commands,
+    error,
     extensions,
+    localrepo,
+    lock,
+    node,
+    registrar,
+    util,
     wireprotov1server,
 )
 
+
+from mercurial.utils import (
+    procutil,
+)
+
 testedwith = b'ships-with-hg-core'
 
 
@@ -226,3 +283,550 @@
 
 def extsetup(ui):
     extensions.wrapfunction(wireprotov1server, b'_capabilities', capabilities)
+
+
+# logic for bundle auto-generation
+
+
+configtable = {}
+configitem = registrar.configitem(configtable)
+
+cmdtable = {}
+command = registrar.command(cmdtable)
+
+configitem(b'clone-bundles', b'auto-generate.formats', default=list)
+
+
+configitem(b'clone-bundles', b'upload-command', default=None)
+
+configitem(b'clone-bundles', b'url-template', default=None)
+
+configitem(b'devel', b'debug.clonebundles', default=False)
+
+
+# category for the post-close transaction hooks
+CAT_POSTCLOSE = b"clonebundles-autobundles"
+
+# template for bundle file names
+BUNDLE_MASK = (
+    b"full-%(bundle_type)s-%(revs)d_revs-%(tip_short)s_tip-%(op_id)s.hg"
+)
+
+
+# file in .hg/ use to track clonebundles being auto-generated
+AUTO_GEN_FILE = b'clonebundles.auto-gen'
+
+
+class BundleBase(object):
+    """represents the core of properties that matters for us in a bundle
+
+    :bundle_type: the bundlespec (see hg help bundlespec)
+    :revs:        the number of revisions in the repo at bundle creation time
+    :tip_rev:     the rev-num of the tip revision
+    :tip_node:    the node id of the tip-most revision in the bundle
+
+    :ready:       True if the bundle is ready to be served
+    """
+
+    ready = False
+
+    def __init__(self, bundle_type, revs, tip_rev, tip_node):
+        self.bundle_type = bundle_type
+        self.revs = revs
+        self.tip_rev = tip_rev
+        self.tip_node = tip_node
+
+    def valid_for(self, repo):
+        """is this bundle applicable to the current repository
+
+        This is useful for detecting bundles made irrelevant by stripping.
+        """
+        tip_node = node.bin(self.tip_node)
+        return repo.changelog.index.get_rev(tip_node) == self.tip_rev
+
+    def __eq__(self, other):
+        left = (self.ready, self.bundle_type, self.tip_rev, self.tip_node)
+        right = (other.ready, other.bundle_type, other.tip_rev, other.tip_node)
+        return left == right
+
+    def __neq__(self, other):
+        return not self == other
+
+    def __cmp__(self, other):
+        if self == other:
+            return 0
+        return -1
+
+
+class RequestedBundle(BundleBase):
+    """A bundle that should be generated.
+
+    Additional attributes compared to BundleBase
+    :heads:       list of head revisions (as rev-num)
+    :op_id:       a "unique" identifier for the operation triggering the change
+    """
+
+    def __init__(self, bundle_type, revs, tip_rev, tip_node, head_revs, op_id):
+        self.head_revs = head_revs
+        self.op_id = op_id
+        super(RequestedBundle, self).__init__(
+            bundle_type,
+            revs,
+            tip_rev,
+            tip_node,
+        )
+
+    @property
+    def suggested_filename(self):
+        """A filename that can be used for the generated bundle"""
+        data = {
+            b'bundle_type': self.bundle_type,
+            b'revs': self.revs,
+            b'heads': self.head_revs,
+            b'tip_rev': self.tip_rev,
+            b'tip_node': self.tip_node,
+            b'tip_short': self.tip_node[:12],
+            b'op_id': self.op_id,
+        }
+        return BUNDLE_MASK % data
+
+    def generate_bundle(self, repo, file_path):
+        """generate the bundle at `filepath`"""
+        commands.bundle(
+            repo.ui,
+            repo,
+            file_path,
+            base=[b"null"],
+            rev=self.head_revs,
+            type=self.bundle_type,
+            quiet=True,
+        )
+
+    def generating(self, file_path, hostname=None, pid=None):
+        """return a GeneratingBundle object from this object"""
+        if pid is None:
+            pid = os.getpid()
+        if hostname is None:
+            hostname = lock._getlockprefix()
+        return GeneratingBundle(
+            self.bundle_type,
+            self.revs,
+            self.tip_rev,
+            self.tip_node,
+            hostname,
+            pid,
+            file_path,
+        )
+
+
+class GeneratingBundle(BundleBase):
+    """A bundle being generated
+
+    extra attributes compared to BundleBase:
+
+    :hostname: the hostname of the machine generating the bundle
+    :pid:      the pid of the process generating the bundle
+    :filepath: the target filename of the bundle
+
+    These attributes exist to help detect stalled generation processes.
+    """
+
+    ready = False
+
+    def __init__(
+        self, bundle_type, revs, tip_rev, tip_node, hostname, pid, filepath
+    ):
+        self.hostname = hostname
+        self.pid = pid
+        self.filepath = filepath
+        super(GeneratingBundle, self).__init__(
+            bundle_type, revs, tip_rev, tip_node
+        )
+
+    @classmethod
+    def from_line(cls, line):
+        """create an object by deserializing a line from AUTO_GEN_FILE"""
+        assert line.startswith(b'PENDING-v1 ')
+        (
+            __,
+            bundle_type,
+            revs,
+            tip_rev,
+            tip_node,
+            hostname,
+            pid,
+            filepath,
+        ) = line.split()
+        hostname = util.urlreq.unquote(hostname)
+        filepath = util.urlreq.unquote(filepath)
+        revs = int(revs)
+        tip_rev = int(tip_rev)
+        pid = int(pid)
+        return cls(
+            bundle_type, revs, tip_rev, tip_node, hostname, pid, filepath
+        )
+
+    def to_line(self):
+        """serialize the object to include as a line in AUTO_GEN_FILE"""
+        templ = b"PENDING-v1 %s %d %d %s %s %d %s"
+        data = (
+            self.bundle_type,
+            self.revs,
+            self.tip_rev,
+            self.tip_node,
+            util.urlreq.quote(self.hostname),
+            self.pid,
+            util.urlreq.quote(self.filepath),
+        )
+        return templ % data
+
+    def __eq__(self, other):
+        if not super(GeneratingBundle, self).__eq__(other):
+            return False
+        left = (self.hostname, self.pid, self.filepath)
+        right = (other.hostname, other.pid, other.filepath)
+        return left == right
+
+    def uploaded(self, url, basename):
+        """return a GeneratedBundle from this object"""
+        return GeneratedBundle(
+            self.bundle_type,
+            self.revs,
+            self.tip_rev,
+            self.tip_node,
+            url,
+            basename,
+        )
+
+
+class GeneratedBundle(BundleBase):
+    """A bundle that is done being generated and can be served
+
+    extra attributes compared to BundleBase:
+
+    :file_url: the url where the bundle is available.
+    :basename: the "basename" used to upload (useful for deletion)
+
+    These attributes exist to generate a bundle manifest
+    (.hg/pullbundles.manifest)
+    """
+
+    ready = True
+
+    def __init__(
+        self, bundle_type, revs, tip_rev, tip_node, file_url, basename
+    ):
+        self.file_url = file_url
+        self.basename = basename
+        super(GeneratedBundle, self).__init__(
+            bundle_type, revs, tip_rev, tip_node
+        )
+
+    @classmethod
+    def from_line(cls, line):
+        """create an object by deserializing a line from AUTO_GEN_FILE"""
+        assert line.startswith(b'DONE-v1 ')
+        (
+            __,
+            bundle_type,
+            revs,
+            tip_rev,
+            tip_node,
+            file_url,
+            basename,
+        ) = line.split()
+        revs = int(revs)
+        tip_rev = int(tip_rev)
+        file_url = util.urlreq.unquote(file_url)
+        return cls(bundle_type, revs, tip_rev, tip_node, file_url, basename)
+
+    def to_line(self):
+        """serialize the object to include as a line in AUTO_GEN_FILE"""
+        templ = b"DONE-v1 %s %d %d %s %s %s"
+        data = (
+            self.bundle_type,
+            self.revs,
+            self.tip_rev,
+            self.tip_node,
+            util.urlreq.quote(self.file_url),
+            self.basename,
+        )
+        return templ % data
+
+    def manifest_line(self):
+        """serialize the object to include as a line in pullbundles.manifest"""
+        templ = b"%s BUNDLESPEC=%s REQUIRESNI=true"
+        return templ % (self.file_url, self.bundle_type)
+
+    def __eq__(self, other):
+        if not super(GeneratedBundle, self).__eq__(other):
+            return False
+        return self.file_url == other.file_url
+
+
+def parse_auto_gen(content):
+    """parse the AUTO_GEN_FILE to return a list of Bundle object"""
+    bundles = []
+    for line in content.splitlines():
+        if line.startswith(b'PENDING-v1 '):
+            bundles.append(GeneratingBundle.from_line(line))
+        elif line.startswith(b'DONE-v1 '):
+            bundles.append(GeneratedBundle.from_line(line))
+    return bundles
+
+
+def dumps_auto_gen(bundles):
+    """serialize a list of Bundle as a AUTO_GEN_FILE content"""
+    lines = []
+    for b in bundles:
+        lines.append(b"%s\n" % b.to_line())
+    lines.sort()
+    return b"".join(lines)
+
+
+def read_auto_gen(repo):
+    """read the AUTO_GEN_FILE for the <repo> a list of Bundle object"""
+    data = repo.vfs.tryread(AUTO_GEN_FILE)
+    if not data:
+        return []
+    return parse_auto_gen(data)
+
+
+def write_auto_gen(repo, bundles):
+    """write a list of Bundle objects into the repo's AUTO_GEN_FILE"""
+    assert repo._cb_lock_ref is not None
+    data = dumps_auto_gen(bundles)
+    with repo.vfs(AUTO_GEN_FILE, mode=b'wb', atomictemp=True) as f:
+        f.write(data)
+
+
+def generate_manifest(bundles):
+    """write a list of Bundle objects into the repo's AUTO_GEN_FILE"""
+    bundles = list(bundles)
+    bundles.sort(key=lambda b: b.bundle_type)
+    lines = []
+    for b in bundles:
+        lines.append(b"%s\n" % b.manifest_line())
+    return b"".join(lines)
+
+
+def update_ondisk_manifest(repo):
+    """update the clonebundle manifest with latest url"""
+    with repo.clonebundles_lock():
+        bundles = read_auto_gen(repo)
+
+        per_types = {}
+        for b in bundles:
+            if not (b.ready and b.valid_for(repo)):
+                continue
+            current = per_types.get(b.bundle_type)
+            if current is not None and current.revs >= b.revs:
+                continue
+            per_types[b.bundle_type] = b
+        manifest = generate_manifest(per_types.values())
+        with repo.vfs(
+            bundlecaches.CB_MANIFEST_FILE, mode=b"wb", atomictemp=True
+        ) as f:
+            f.write(manifest)
+
+
+def update_bundle_list(repo, new_bundles=(), del_bundles=()):
+    """modify the repo's AUTO_GEN_FILE
+
+    This method also regenerates the clone bundle manifest when needed"""
+    with repo.clonebundles_lock():
+        bundles = read_auto_gen(repo)
+        if del_bundles:
+            bundles = [b for b in bundles if b not in del_bundles]
+        new_bundles = [b for b in new_bundles if b not in bundles]
+        bundles.extend(new_bundles)
+        write_auto_gen(repo, bundles)
+        all_changed = []
+        all_changed.extend(new_bundles)
+        all_changed.extend(del_bundles)
+        if any(b.ready for b in all_changed):
+            update_ondisk_manifest(repo)
+
+
+def cleanup_tmp_bundle(repo, target):
+    """remove a GeneratingBundle file and entry"""
+    assert not target.ready
+    with repo.clonebundles_lock():
+        repo.vfs.tryunlink(target.filepath)
+        update_bundle_list(repo, del_bundles=[target])
+
+
+def finalize_one_bundle(repo, target):
+    """upload a generated bundle and advertise it in the clonebundles.manifest"""
+    with repo.clonebundles_lock():
+        bundles = read_auto_gen(repo)
+        if target in bundles and target.valid_for(repo):
+            result = upload_bundle(repo, target)
+            update_bundle_list(repo, new_bundles=[result])
+    cleanup_tmp_bundle(repo, target)
+
+
+def upload_bundle(repo, bundle):
+    """upload the result of a GeneratingBundle and return a GeneratedBundle
+
+    The upload is done using the `clone-bundles.upload-command`
+    """
+    cmd = repo.ui.config(b'clone-bundles', b'upload-command')
+    url = repo.ui.config(b'clone-bundles', b'url-template')
+    basename = repo.vfs.basename(bundle.filepath)
+    filepath = procutil.shellquote(bundle.filepath)
+    variables = {
+        b'HGCB_BUNDLE_PATH': filepath,
+        b'HGCB_BUNDLE_BASENAME': basename,
+    }
+    env = procutil.shellenviron(environ=variables)
+    ret = repo.ui.system(cmd, environ=env)
+    if ret:
+        raise error.Abort(b"command returned status %d: %s" % (ret, cmd))
+    url = (
+        url.decode('utf8')
+        .format(basename=basename.decode('utf8'))
+        .encode('utf8')
+    )
+    return bundle.uploaded(url, basename)
+
+
+def auto_bundle_needed_actions(repo, bundles, op_id):
+    """find the list of bundles that need action
+
+    returns a list of RequestedBundle objects that need to be generated and
+    uploaded."""
+    create_bundles = []
+    repo = repo.filtered(b"immutable")
+    targets = repo.ui.configlist(b'clone-bundles', b'auto-generate.formats')
+    revs = len(repo.changelog)
+    generic_data = {
+        'revs': revs,
+        'head_revs': repo.changelog.headrevs(),
+        'tip_rev': repo.changelog.tiprev(),
+        'tip_node': node.hex(repo.changelog.tip()),
+        'op_id': op_id,
+    }
+    for t in targets:
+        data = generic_data.copy()
+        data['bundle_type'] = t
+        b = RequestedBundle(**data)
+        create_bundles.append(b)
+    return create_bundles
+
+
+def start_one_bundle(repo, bundle):
+    """start the generation of a single bundle file
+
+    the `bundle` argument should be a RequestedBundle object.
+
+    This data is passed to the `debugmakeclonebundles` "as is".
+    """
+    data = util.pickle.dumps(bundle)
+    cmd = [procutil.hgexecutable(), b'--cwd', repo.path, INTERNAL_CMD]
+    env = procutil.shellenviron()
+    msg = b'clone-bundles: starting bundle generation: %s\n'
+    stdout = None
+    stderr = None
+    waits = []
+    record_wait = None
+    if repo.ui.configbool(b'devel', b'debug.clonebundles'):
+        stdout = procutil.stdout
+        stderr = procutil.stderr
+        repo.ui.write(msg % bundle.bundle_type)
+        record_wait = waits.append
+    else:
+        repo.ui.debug(msg % bundle.bundle_type)
+    bg = procutil.runbgcommand
+    bg(
+        cmd,
+        env,
+        stdin_bytes=data,
+        stdout=stdout,
+        stderr=stderr,
+        record_wait=record_wait,
+    )
+    for f in waits:
+        f()
+
+
+INTERNAL_CMD = b'debug::internal-make-clone-bundles'
+
+
+@command(INTERNAL_CMD, [], b'')
+def debugmakeclonebundles(ui, repo):
+    """Internal command to auto-generate debug bundles"""
+    requested_bundle = util.pickle.load(procutil.stdin)
+    procutil.stdin.close()
+
+    fname = requested_bundle.suggested_filename
+    fpath = repo.vfs.makedirs(b'tmp-bundles')
+    fpath = repo.vfs.join(b'tmp-bundles', fname)
+    bundle = requested_bundle.generating(fpath)
+    update_bundle_list(repo, new_bundles=[bundle])
+
+    requested_bundle.generate_bundle(repo, fpath)
+
+    repo.invalidate()
+    finalize_one_bundle(repo, bundle)
+
+
+def make_auto_bundler(source_repo):
+    reporef = weakref.ref(source_repo)
+
+    def autobundle(tr):
+        repo = reporef()
+        assert repo is not None
+        bundles = read_auto_gen(repo)
+        new = auto_bundle_needed_actions(repo, bundles, b"%d_txn" % id(tr))
+        for data in new:
+            start_one_bundle(repo, data)
+        return None
+
+    return autobundle
+
+
+def reposetup(ui, repo):
+    """install the two pieces needed for automatic clonebundle generation
+
+    - add a "post-close" hook that fires bundling when needed
+    - introduce a clone-bundle lock to let multiple processes meddle with the
+      state files.
+    """
+    if not repo.local():
+        return
+
+    class autobundlesrepo(repo.__class__):
+        def transaction(self, *args, **kwargs):
+            tr = super(autobundlesrepo, self).transaction(*args, **kwargs)
+            targets = repo.ui.configlist(
+                b'clone-bundles', b'auto-generate.formats'
+            )
+            if targets:
+                tr.addpostclose(CAT_POSTCLOSE, make_auto_bundler(self))
+            return tr
+
+        @localrepo.unfilteredmethod
+        def clonebundles_lock(self, wait=True):
+            '''Lock the repository file related to clone bundles'''
+            if not util.safehasattr(self, '_cb_lock_ref'):
+                self._cb_lock_ref = None
+            l = self._currentlock(self._cb_lock_ref)
+            if l is not None:
+                l.lock()
+                return l
+
+            l = self._lock(
+                vfs=self.vfs,
+                lockname=b"clonebundleslock",
+                wait=wait,
+                releasefn=None,
+                acquirefn=None,
+                desc=_(b'repository %s') % self.origroot,
+            )
+            self._cb_lock_ref = weakref.ref(l)
+            return l
+
+    repo._wlockfreeprefix.add(AUTO_GEN_FILE)
+    repo._wlockfreeprefix.add(bundlecaches.CB_MANIFEST_FILE)
+    repo.__class__ = autobundlesrepo
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-clonebundles-autogen.t	Mon Mar 13 17:34:18 2023 +0100
@@ -0,0 +1,70 @@
+
+#require no-reposimplestore no-chg
+
+initial setup
+
+  $ hg init server
+  $ cat >> server/.hg/hgrc << EOF
+  > [extensions]
+  > clonebundles =
+  > 
+  > [clone-bundles]
+  > auto-generate.formats = v2
+  > upload-command = cp "\$HGCB_BUNDLE_PATH" "$TESTTMP"/final-upload/
+  > url-template = file://$TESTTMP/final-upload/{basename}
+  > 
+  > [devel]
+  > debug.clonebundles=yes
+  > EOF
+
+  $ mkdir final-upload
+  $ hg clone server client
+  updating to branch default
+  0 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ cd client
+
+Test bundles are generated on push
+==================================
+
+  $ touch foo
+  $ hg -q commit -A -m 'add foo'
+  $ touch bar
+  $ hg -q commit -A -m 'add bar'
+  $ hg push
+  pushing to $TESTTMP/server
+  searching for changes
+  adding changesets
+  adding manifests
+  adding file changes
+  2 changesets found
+  added 2 changesets with 2 changes to 2 files
+  clone-bundles: starting bundle generation: v2
+  $ cat ../server/.hg/clonebundles.manifest
+  file:/*/$TESTTMP/final-upload/full-v2-2_revs-aaff8d2ffbbf_tip-*_txn.hg BUNDLESPEC=v2 REQUIRESNI=true (glob)
+  $ ls -1 ../final-upload
+  full-v2-2_revs-aaff8d2ffbbf_tip-*_txn.hg (glob)
+  $ ls -1 ../server/.hg/tmp-bundles
+
+Newer bundles are generated with more pushes
+--------------------------------------------
+
+  $ touch baz
+  $ hg -q commit -A -m 'add baz'
+  $ touch buz
+  $ hg -q commit -A -m 'add buz'
+  $ hg push
+  pushing to $TESTTMP/server
+  searching for changes
+  adding changesets
+  adding manifests
+  adding file changes
+  4 changesets found
+  added 2 changesets with 2 changes to 2 files
+  clone-bundles: starting bundle generation: v2
+
+  $ cat ../server/.hg/clonebundles.manifest
+  file:/*/$TESTTMP/final-upload/full-v2-4_revs-6427147b985a_tip-*_txn.hg BUNDLESPEC=v2 REQUIRESNI=true (glob)
+  $ ls -1 ../final-upload
+  full-v2-2_revs-aaff8d2ffbbf_tip-*_txn.hg (glob)
+  full-v2-4_revs-6427147b985a_tip-*_txn.hg (glob)
+  $ ls -1 ../server/.hg/tmp-bundles