in project/nanoeval/nanoeval/_multiprocessing_utils.py [0:0]
def get_loky_executor(num_processes: int | None = None) -> loky.Executor:
global _executor_cache
_set_loky_parameters()
if _executor_cache is None:
_executor_cache = get_reusable_executor(
# Always reuse, even if flags change
reuse=cast(Any, True),
max_workers=num_processes,
# Some eval code...
# creates new processes. These don't seem to get cleaned up properly
# when loky closes idle executors, probably because we don't use
# hard_exit. Because of this, it seems like a deadlock can happen
# where:
#
# 1. Loky requests termination of the executor due to idle timeout
# 2. The executor waits on its subprocesses, due to default code in
# multiprocessing.
# 3. The subprocesses are waiting to get terminated by the atexit handler
# in the executor. However, we never get to atexit because multiprocessing
# closes subprocesses before calling atexit.
#
# For example, see this executor traceback:
#
# Process 73472: /root/.pyenv/versions/3.11.8/bin/python3 -c from multiprocessing.spawn import spawn_main; spawn_main(tracker_fd=23, pipe_handle=41) --multiprocessing-fork
# Python v3.11.8 (/root/.pyenv/versions/3.11.8/bin/python3.11)
#
# Thread 73472 (idle): "MainThread"
# select (selectors.py:415)
# wait (multiprocessing/connection.py:947)
# poll (multiprocessing/popen_forkserver.py:65)
# wait (multiprocessing/popen_fork.py:43)
# join (multiprocessing/process.py:149)
# _exit_function (multiprocessing/util.py:360) # <--- waiting for all subprocesses to exit, doesn't realize they're waiting for it
# _bootstrap (multiprocessing/process.py:317)
# _main (multiprocessing/spawn.py:135)
# spawn_main (multiprocessing/spawn.py:122)
# <module> (<string>:1)
#
# This causes it to block the queue of actual executors that want to
# return results. This commit fixes the issue by simply not ever
# closing executors. By not closing the executors, we can essentially
# sidestep the deadlock on close issue and finish the eval successfully.
timeout=1_000_000,
)
global_exit_stack.callback(_shutdown)
return _executor_cache