contrib: add pull_logger extension
authorpacien <pacien.trangirard@pacien.net>
Mon, 25 Jul 2022 22:47:15 +0200
changeset 49508 791050360486
parent 49506 44bc045a43ca
child 49509 946c023212b8
contrib: add pull_logger extension This extension logs the pull parameters, i.e. the remote and common heads, when pulling from the local repository. The collected data should give an idea of the state of a pair of repositories and allow replaying past synchronisations between them. This is particularly useful for working on data exchange, bundling and caching-related optimisations.
contrib/pull_logger.py
tests/test-contrib-pull-logger.t
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/pull_logger.py	Mon Jul 25 22:47:15 2022 +0200
@@ -0,0 +1,129 @@
+# pull_logger.py - Logs pulls to a JSON-line file in the repo's VFS.
+#
+# Copyright 2022  Pacien TRAN-GIRARD <pacien.trangirard@pacien.net>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+
+'''logs pull parameters to a file
+
+This extension logs the pull parameters, i.e. the remote and common heads,
+when pulling from the local repository.
+
+The collected data should give an idea of the state of a pair of repositories
+and allow replaying past synchronisations between them. This is particularly
+useful for working on data exchange, bundling and caching-related
+optimisations.
+
+The record is a JSON-line file located in the repository's VFS at
+.hg/pull_log.jsonl.
+
+Log write failures are not considered fatal: log writes may be skipped for any
+reason such as insufficient storage or a timeout.
+
+The timeouts of the exclusive lock used when writing to the lock file can be
+configured through the 'timeout.lock' and 'timeout.warn' options of this
+plugin. Those are not expected to be held for a significant time in practice.::
+
+  [pull-logger]
+  timeout.lock = 300
+  timeout.warn = 100
+
+Note: there is no automatic log rotation and the size of the log is not capped.
+'''
+
+
+import json
+import time
+
+from mercurial.i18n import _
+from mercurial.utils import stringutil
+from mercurial import (
+    error,
+    extensions,
+    lock,
+    registrar,
+    wireprotov1server,
+)
+
+EXT_NAME = b'pull-logger'
+EXT_VERSION_CODE = 0
+
+LOG_FILE = b'pull_log.jsonl'
+LOCK_NAME = LOG_FILE + b'.lock'
+
+configtable = {}
+configitem = registrar.configitem(configtable)
+configitem(EXT_NAME, b'timeout.lock', default=600)
+configitem(EXT_NAME, b'timeout.warn', default=120)
+
+
+def wrap_getbundle(orig, repo, proto, others, *args, **kwargs):
+    heads, common = extract_pull_heads(others)
+    log_entry = {
+        'timestamp': time.time(),
+        'logger_version': EXT_VERSION_CODE,
+        'heads': sorted(heads),
+        'common': sorted(common),
+    }
+
+    try:
+        write_to_log(repo, log_entry)
+    except (IOError, error.LockError) as err:
+        msg = stringutil.forcebytestr(err)
+        repo.ui.warn(_(b'unable to append to pull log: %s\n') % msg)
+
+    return orig(repo, proto, others, *args, **kwargs)
+
+
+def extract_pull_heads(bundle_args):
+    opts = wireprotov1server.options(
+        b'getbundle',
+        wireprotov1server.wireprototypes.GETBUNDLE_ARGUMENTS.keys(),
+        bundle_args.copy(),  # this call consumes the args destructively
+    )
+
+    heads = opts.get(b'heads', b'').decode('utf-8').split(' ')
+    common = opts.get(b'common', b'').decode('utf-8').split(' ')
+    return (heads, common)
+
+
+def write_to_log(repo, entry):
+    locktimeout = repo.ui.configint(EXT_NAME, b'timeout.lock')
+    lockwarntimeout = repo.ui.configint(EXT_NAME, b'timeout.warn')
+
+    with lock.trylock(
+        ui=repo.ui,
+        vfs=repo.vfs,
+        lockname=LOCK_NAME,
+        timeout=locktimeout,
+        warntimeout=lockwarntimeout,
+    ):
+        with repo.vfs.open(LOG_FILE, b'a+') as logfile:
+            serialised = json.dumps(entry, sort_keys=True)
+            logfile.write(serialised.encode('utf-8'))
+            logfile.write(b'\n')
+            logfile.flush()
+
+
+def reposetup(ui, repo):
+    if repo.local():
+        repo._wlockfreeprefix.add(LOG_FILE)
+
+
+def uisetup(ui):
+    del wireprotov1server.commands[b'getbundle']
+    decorator = wireprotov1server.wireprotocommand(
+        name=b'getbundle',
+        args=b'*',
+        permission=b'pull',
+    )
+
+    extensions.wrapfunction(
+        container=wireprotov1server,
+        funcname='getbundle',
+        wrapper=wrap_getbundle,
+    )
+
+    decorator(wireprotov1server.getbundle)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-contrib-pull-logger.t	Mon Jul 25 22:47:15 2022 +0200
@@ -0,0 +1,61 @@
+Check that the pull logger plugins logs pulls
+=============================================
+
+Enable the extension
+
+  $ echo "[extensions]" >> $HGRCPATH
+  $ echo "pull-logger = $TESTDIR/../contrib/pull_logger.py" >> $HGRCPATH
+
+
+Check the format of the generated log entries, with a bunch of elements in the
+common and heads set
+
+  $ hg init server
+  $ hg -R server debugbuilddag '.*2+2'
+  $ hg clone ssh://user@dummy/server client --rev 0
+  adding changesets
+  adding manifests
+  adding file changes
+  added 1 changesets with 0 changes to 0 files
+  new changesets 1ea73414a91b
+  updating to branch default
+  0 files updated, 0 files merged, 0 files removed, 0 files unresolved
+  $ tail -1 server/.hg/pull_log.jsonl
+  {"common": ["0000000000000000000000000000000000000000"], "heads": ["1ea73414a91b0920940797d8fc6a11e447f8ea1e"], "logger_version": 0, "timestamp": *} (glob)
+  $ hg -R client pull --rev 1 --rev 2
+  pulling from ssh://user@dummy/server
+  searching for changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 2 changesets with 0 changes to 0 files (+1 heads)
+  new changesets d8736c3a2c84:fa28e81e283b
+  (run 'hg heads' to see heads, 'hg merge' to merge)
+  $ tail -1 server/.hg/pull_log.jsonl
+  {"common": ["1ea73414a91b0920940797d8fc6a11e447f8ea1e"], "heads": ["d8736c3a2c84ee759a2821385804bcb67f266ade", "fa28e81e283b3416de4d48ee0dd2d446e9e38d7c"], "logger_version": 0, "timestamp": *} (glob)
+  $ hg -R client pull --rev 2 --rev 3
+  pulling from ssh://user@dummy/server
+  searching for changes
+  adding changesets
+  adding manifests
+  adding file changes
+  added 1 changesets with 0 changes to 0 files
+  new changesets 944641ddcaef
+  (run 'hg update' to get a working copy)
+  $ tail -1 server/.hg/pull_log.jsonl
+  {"common": ["1ea73414a91b0920940797d8fc6a11e447f8ea1e", "fa28e81e283b3416de4d48ee0dd2d446e9e38d7c"], "heads": ["944641ddcaef174df7ce1bc2751a5f165129778b", "fa28e81e283b3416de4d48ee0dd2d446e9e38d7c"], "logger_version": 0, "timestamp": *} (glob)
+
+
+Check the number of entries generated in the log when pulling from multiple
+clients at the same time
+
+  $ rm -f server/.hg/pull_log.jsonl
+  $ for i in $($TESTDIR/seq.py 32); do
+  >   hg clone ssh://user@dummy/server client_$i --rev 0
+  > done > /dev/null
+  $ for i in $($TESTDIR/seq.py 32); do
+  >   hg -R client_$i pull --rev 1 &
+  > done > /dev/null
+  $ wait
+  $ wc -l server/.hg/pull_log.jsonl
+  \s*64 .* (re)