# HG changeset patch # User Bryan O'Sullivan # Date 1361388694 28800 # Node ID 9955fc5ee24ba0916a63dbaef9458a7d9e0d110d # Parent 86524a70c0f6272917dc03ed0b258917c81f4419 worker: handle worker failures more aggressively We now wait for worker processes in a separate thread, so that we can spot failures in a timely way, wihout waiting for the progress pipe to drain. If a worker fails, we recover the pre-parallel-update behaviour of failing early by killing its peers before propagating the failure. diff -r 86524a70c0f6 -r 9955fc5ee24b mercurial/worker.py --- a/mercurial/worker.py Wed Feb 20 11:31:31 2013 -0800 +++ b/mercurial/worker.py Wed Feb 20 11:31:34 2013 -0800 @@ -6,7 +6,7 @@ # GNU General Public License version 2 or any later version. from i18n import _ -import os, signal, sys, util +import os, signal, sys, threading, util def countcpus(): '''try to count the number of CPUs on the system''' @@ -77,6 +77,7 @@ workers = _numworkers(ui) oldhandler = signal.getsignal(signal.SIGINT) signal.signal(signal.SIGINT, signal.SIG_IGN) + pids, problem = [], [0] for pargs in partition(args, workers): pid = os.fork() if pid == 0: @@ -88,26 +89,40 @@ os._exit(0) except KeyboardInterrupt: os._exit(255) + pids.append(pid) + pids.reverse() os.close(wfd) fp = os.fdopen(rfd, 'rb', 0) + def killworkers(): + # if one worker bails, there's no good reason to wait for the rest + for p in pids: + try: + os.kill(p, signal.SIGTERM) + except OSError, err: + if err.errno != errno.ESRCH: + raise + def waitforworkers(): + for _ in pids: + st = _exitstatus(os.wait()[1]) + if st and not problem: + problem[0] = st + killworkers() + t = threading.Thread(target=waitforworkers) + t.start() def cleanup(): - # python 2.4 is too dumb for try/yield/finally signal.signal(signal.SIGINT, oldhandler) - problem = None - for i in xrange(workers): - pid, st = os.wait() - st = _exitstatus(st) - if st and not problem: - problem = st - if problem: - if problem < 0: - os.kill(os.getpid(), -problem) - sys.exit(problem) + t.join() + status = problem[0] + if status: + if status < 0: + os.kill(os.getpid(), -status) + sys.exit(status) try: for line in fp: l = line.split(' ', 1) yield int(l[0]), l[1][:-1] except: # re-raises + killworkers() cleanup() raise cleanup()