Merge pull request #122 from vibe-d/fix_zombie_processes
Use waitpid to iterate over all exited child processes
This commit is contained in:
commit
bca94d5736
|
@ -55,7 +55,7 @@ final class PosixEventDriver(Loop : PosixEventLoop) : EventDriver {
|
|||
version (linux) alias WatcherDriver = InotifyEventDriverWatchers!EventsDriver;
|
||||
//else version (OSX) alias WatcherDriver = FSEventsEventDriverWatchers!EventsDriver;
|
||||
else alias WatcherDriver = PollEventDriverWatchers!EventsDriver;
|
||||
version (linux) alias ProcessDriver = SignalEventDriverProcesses!Loop;
|
||||
version (Posix) alias ProcessDriver = PosixEventDriverProcesses!Loop;
|
||||
else alias ProcessDriver = DummyEventDriverProcesses!Loop;
|
||||
|
||||
Loop m_loop;
|
||||
|
|
|
@ -10,96 +10,34 @@ import std.algorithm.comparison : among;
|
|||
import std.variant : visit;
|
||||
import std.stdint;
|
||||
|
||||
private struct ProcessInfo {
|
||||
bool exited = true;
|
||||
int exitCode;
|
||||
ProcessWaitCallback[] callbacks;
|
||||
size_t refCount = 0;
|
||||
EventDriverProcesses driver;
|
||||
|
||||
DataInitializer userDataDestructor;
|
||||
ubyte[16*size_t.sizeof] userData;
|
||||
}
|
||||
|
||||
private struct StaticProcesses {
|
||||
@safe: nothrow:
|
||||
import core.sync.mutex : Mutex;
|
||||
|
||||
private {
|
||||
static shared Mutex m_mutex;
|
||||
static __gshared ProcessInfo[ProcessID] m_processes;
|
||||
}
|
||||
|
||||
shared static this()
|
||||
{
|
||||
m_mutex = new shared Mutex;
|
||||
}
|
||||
|
||||
static void add(ProcessID pid, ProcessInfo info) @trusted {
|
||||
m_mutex.lock_nothrow();
|
||||
scope (exit) m_mutex.unlock_nothrow();
|
||||
|
||||
assert(pid !in m_processes, "Process adopted twice");
|
||||
m_processes[pid] = info;
|
||||
}
|
||||
}
|
||||
|
||||
private auto lockedProcessInfo(alias fn)(ProcessID pid) @trusted {
|
||||
StaticProcesses.m_mutex.lock_nothrow();
|
||||
scope (exit) StaticProcesses.m_mutex.unlock_nothrow();
|
||||
auto info = pid in StaticProcesses.m_processes;
|
||||
|
||||
return fn(info);
|
||||
}
|
||||
|
||||
|
||||
private enum SIGCHLD = 17;
|
||||
|
||||
final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProcesses {
|
||||
final class PosixEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProcesses {
|
||||
@safe: /*@nogc:*/ nothrow:
|
||||
import core.stdc.errno : errno, EAGAIN, EINPROGRESS;
|
||||
import core.sys.linux.sys.signalfd;
|
||||
import core.sys.posix.unistd : close, read, write, dup;
|
||||
import core.sync.mutex : Mutex;
|
||||
import core.sys.posix.unistd : dup;
|
||||
import core.thread : Thread;
|
||||
|
||||
private {
|
||||
static shared Mutex s_mutex;
|
||||
static __gshared ProcessInfo[ProcessID] s_processes;
|
||||
static __gshared Thread s_waitThread;
|
||||
|
||||
Loop m_loop;
|
||||
// FIXME: avoid virtual funciton calls and use the final type instead
|
||||
EventDriver m_driver;
|
||||
SignalListenID m_sighandle;
|
||||
}
|
||||
|
||||
this(Loop loop, EventDriver driver)
|
||||
{
|
||||
import core.sys.posix.signal;
|
||||
|
||||
m_loop = loop;
|
||||
m_driver = driver;
|
||||
|
||||
// Listen for child process exits using SIGCHLD
|
||||
m_sighandle = () @trusted {
|
||||
sigset_t sset;
|
||||
sigemptyset(&sset);
|
||||
sigaddset(&sset, SIGCHLD);
|
||||
|
||||
assert(sigprocmask(SIG_BLOCK, &sset, null) == 0);
|
||||
|
||||
return SignalListenID(signalfd(-1, &sset, SFD_NONBLOCK | SFD_CLOEXEC));
|
||||
} ();
|
||||
|
||||
m_loop.initFD(cast(FD)m_sighandle, FDFlags.internal, SignalSlot(null));
|
||||
m_loop.registerFD(cast(FD)m_sighandle, EventMask.read);
|
||||
m_loop.setNotifyCallback!(EventType.read)(cast(FD)m_sighandle, &onSignal);
|
||||
|
||||
onSignal(cast(FD)m_sighandle);
|
||||
}
|
||||
|
||||
void dispose()
|
||||
{
|
||||
FD sighandle = cast(FD)m_sighandle;
|
||||
m_loop.m_fds[sighandle].common.refCount--;
|
||||
m_loop.setNotifyCallback!(EventType.read)(sighandle, null);
|
||||
m_loop.unregisterFD(sighandle, EventMask.read|EventMask.write|EventMask.status);
|
||||
m_loop.clearFD!(SignalSlot)(sighandle);
|
||||
close(cast(int)sighandle);
|
||||
}
|
||||
|
||||
final override ProcessID adopt(int system_pid)
|
||||
|
@ -110,8 +48,7 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
|
|||
info.exited = false;
|
||||
info.refCount = 1;
|
||||
info.driver = this;
|
||||
StaticProcesses.add(pid, info);
|
||||
|
||||
add(pid, info);
|
||||
return pid;
|
||||
}
|
||||
|
||||
|
@ -217,6 +154,9 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
|
|||
@trusted {
|
||||
import core.sys.posix.signal : pkill = kill;
|
||||
|
||||
assert(cast(int)pid > 0, "Invalid PID passed to kill.");
|
||||
|
||||
if (cast(int)pid > 0)
|
||||
pkill(cast(int)pid, signal);
|
||||
}
|
||||
|
||||
|
@ -225,18 +165,18 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
|
|||
bool exited;
|
||||
int exitCode;
|
||||
|
||||
size_t id = lockedProcessInfo!((info) {
|
||||
size_t id = size_t.max;
|
||||
lockedProcessInfo(pid, (info) {
|
||||
assert(info !is null, "Unknown process ID");
|
||||
|
||||
if (info.exited) {
|
||||
exited = true;
|
||||
exitCode = info.exitCode;
|
||||
return 0;
|
||||
} else {
|
||||
info.callbacks ~= on_process_exit;
|
||||
return info.callbacks.length - 1;
|
||||
id = info.callbacks.length - 1;
|
||||
}
|
||||
})(pid);
|
||||
});
|
||||
|
||||
if (exited) {
|
||||
on_process_exit(pid, exitCode);
|
||||
|
@ -245,62 +185,22 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
|
|||
return id;
|
||||
}
|
||||
|
||||
final override void cancelWait(ProcessID pid, size_t waitId)
|
||||
final override void cancelWait(ProcessID pid, size_t wait_id)
|
||||
{
|
||||
lockedProcessInfo!((info) {
|
||||
if (wait_id == size_t.max) return;
|
||||
|
||||
lockedProcessInfo(pid, (info) {
|
||||
assert(info !is null, "Unknown process ID");
|
||||
assert(!info.exited, "Cannot cancel wait when none are pending");
|
||||
assert(info.callbacks.length > waitId, "Invalid process wait ID");
|
||||
assert(info.callbacks.length > wait_id, "Invalid process wait ID");
|
||||
|
||||
info.callbacks[waitId] = null;
|
||||
})(pid);
|
||||
info.callbacks[wait_id] = null;
|
||||
});
|
||||
}
|
||||
|
||||
private void onSignal(FD fd)
|
||||
{
|
||||
SignalListenID lid = cast(SignalListenID)fd;
|
||||
|
||||
signalfd_siginfo nfo;
|
||||
do {
|
||||
auto ret = () @trusted { return read(cast(int)fd, &nfo, nfo.sizeof); } ();
|
||||
|
||||
if (ret == -1 && errno.among!(EAGAIN, EINPROGRESS) || ret != nfo.sizeof)
|
||||
return;
|
||||
|
||||
onProcessExit(nfo.ssi_pid, nfo.ssi_status);
|
||||
} while (true);
|
||||
}
|
||||
|
||||
private void onProcessExit(int system_pid, int exitCode)
|
||||
{
|
||||
auto pid = cast(ProcessID)system_pid;
|
||||
|
||||
ProcessWaitCallback[] callbacks;
|
||||
auto driver = lockedProcessInfo!((info) @safe {
|
||||
// We get notified of any child exiting, so ignore the ones we're
|
||||
// not aware of
|
||||
if (info is null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Increment the ref count to make sure it doesn't get removed
|
||||
info.refCount++;
|
||||
|
||||
info.exited = true;
|
||||
info.exitCode = exitCode;
|
||||
return info.driver;
|
||||
})(pid);
|
||||
|
||||
// Need to call callbacks in the owner thread as this function can be
|
||||
// called from any thread. Without extra threads this is always the main
|
||||
// thread.
|
||||
if (() @trusted { return cast(void*)this == cast(void*)driver; } ()) {
|
||||
onLocalProcessExit(cast(intptr_t)pid);
|
||||
} else if (driver) {
|
||||
auto sharedDriver = () @trusted { return cast(shared typeof(this))driver; } ();
|
||||
|
||||
sharedDriver.m_driver.core.runInOwnerThread(&onLocalProcessExit, cast(intptr_t)pid);
|
||||
}
|
||||
private void onProcessExit(int system_pid)
|
||||
shared {
|
||||
m_driver.core.runInOwnerThread(&onLocalProcessExit, system_pid);
|
||||
}
|
||||
|
||||
private static void onLocalProcessExit(intptr_t system_pid)
|
||||
|
@ -310,7 +210,8 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
|
|||
int exitCode;
|
||||
ProcessWaitCallback[] callbacks;
|
||||
|
||||
auto driver = lockedProcessInfo!((info) {
|
||||
PosixEventDriverProcesses driver;
|
||||
lockedProcessInfo(pid, (info) {
|
||||
assert(info !is null);
|
||||
|
||||
exitCode = info.exitCode;
|
||||
|
@ -318,8 +219,8 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
|
|||
callbacks = info.callbacks;
|
||||
info.callbacks = null;
|
||||
|
||||
return info.driver;
|
||||
})(pid);
|
||||
driver = info.driver;
|
||||
});
|
||||
|
||||
foreach (cb; callbacks) {
|
||||
if (cb)
|
||||
|
@ -331,53 +232,160 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
|
|||
|
||||
final override bool hasExited(ProcessID pid)
|
||||
{
|
||||
return lockedProcessInfo!((info) {
|
||||
bool ret;
|
||||
lockedProcessInfo(pid, (info) {
|
||||
assert(info !is null, "Unknown process ID");
|
||||
|
||||
return info.exited;
|
||||
})(pid);
|
||||
ret = info.exited;
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
||||
final override void addRef(ProcessID pid)
|
||||
{
|
||||
lockedProcessInfo!((info) {
|
||||
lockedProcessInfo(pid, (info) {
|
||||
nogc_assert(info.refCount > 0, "Adding reference to unreferenced process FD.");
|
||||
info.refCount++;
|
||||
})(pid);
|
||||
});
|
||||
}
|
||||
|
||||
final override bool releaseRef(ProcessID pid)
|
||||
{
|
||||
return lockedProcessInfo!((info) {
|
||||
bool ret;
|
||||
lockedProcessInfo(pid, (info) {
|
||||
nogc_assert(info.refCount > 0, "Releasing reference to unreferenced process FD.");
|
||||
if (--info.refCount == 0) {
|
||||
// Remove/deallocate process
|
||||
if (info.userDataDestructor)
|
||||
() @trusted { info.userDataDestructor(info.userData.ptr); } ();
|
||||
|
||||
StaticProcesses.m_processes.remove(pid);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
})(pid);
|
||||
() @trusted { s_processes.remove(pid); } ();
|
||||
ret = false;
|
||||
} else ret = true;
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
||||
final protected override void* rawUserData(ProcessID pid, size_t size, DataInitializer initialize, DataInitializer destroy)
|
||||
@system {
|
||||
return lockedProcessInfo!((info) {
|
||||
void* ret;
|
||||
lockedProcessInfo(pid, (info) @safe nothrow {
|
||||
assert(info.userDataDestructor is null || info.userDataDestructor is destroy,
|
||||
"Requesting user data with differing type (destructor).");
|
||||
assert(size <= ProcessInfo.userData.length, "Requested user data is too large.");
|
||||
|
||||
if (!info.userDataDestructor) {
|
||||
initialize(info.userData.ptr);
|
||||
() @trusted { initialize(info.userData.ptr); } ();
|
||||
info.userDataDestructor = destroy;
|
||||
}
|
||||
return info.userData.ptr;
|
||||
})(pid);
|
||||
ret = () @trusted { return info.userData.ptr; } ();
|
||||
});
|
||||
return ret;
|
||||
}
|
||||
|
||||
package final @property size_t pendingCount() const nothrow @trusted { return StaticProcesses.m_processes.length; }
|
||||
package final @property size_t pendingCount() const nothrow @trusted { return s_processes.length; }
|
||||
|
||||
|
||||
shared static this()
|
||||
{
|
||||
s_mutex = new shared Mutex;
|
||||
}
|
||||
|
||||
private static void lockedProcessInfo(ProcessID pid, scope void delegate(ProcessInfo*) nothrow @safe fn)
|
||||
{
|
||||
s_mutex.lock_nothrow();
|
||||
scope (exit) s_mutex.unlock_nothrow();
|
||||
auto info = () @trusted { return pid in s_processes; } ();
|
||||
fn(info);
|
||||
}
|
||||
|
||||
private static void add(ProcessID pid, ProcessInfo info) @trusted {
|
||||
s_mutex.lock_nothrow();
|
||||
scope (exit) s_mutex.unlock_nothrow();
|
||||
|
||||
if (!s_waitThread) {
|
||||
s_waitThread = new Thread(&waitForProcesses);
|
||||
s_waitThread.start();
|
||||
}
|
||||
|
||||
assert(pid !in s_processes, "Process adopted twice");
|
||||
s_processes[pid] = info;
|
||||
}
|
||||
|
||||
private static void waitForProcesses()
|
||||
@system {
|
||||
import core.sys.posix.sys.wait : idtype_t, WNOHANG, WNOWAIT, WEXITED, WEXITSTATUS, WIFEXITED, WTERMSIG, waitid, waitpid;
|
||||
import core.sys.posix.signal : siginfo_t;
|
||||
|
||||
while (true) {
|
||||
siginfo_t dummy;
|
||||
auto ret = waitid(idtype_t.P_ALL, -1, &dummy, WEXITED|WNOWAIT);
|
||||
if (ret == -1) {
|
||||
{
|
||||
s_mutex.lock_nothrow();
|
||||
scope (exit) s_mutex.unlock_nothrow();
|
||||
s_waitThread = null;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
ProcessID[] allprocs;
|
||||
|
||||
{
|
||||
s_mutex.lock_nothrow();
|
||||
scope (exit) s_mutex.unlock_nothrow();
|
||||
|
||||
|
||||
() @trusted {
|
||||
foreach (ref entry; s_processes.byKeyValue) {
|
||||
if (!entry.value.exited)
|
||||
allprocs ~= entry.key;
|
||||
}
|
||||
} ();
|
||||
}
|
||||
|
||||
foreach (pid; allprocs) {
|
||||
int status;
|
||||
ret = () @trusted { return waitpid(cast(int)pid, &status, WNOHANG); } ();
|
||||
if (ret == cast(int)pid) {
|
||||
int exitstatus = WIFEXITED(status) ? WEXITSTATUS(status) : -WTERMSIG(status);
|
||||
onProcessExitStatic(ret, exitstatus);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void onProcessExitStatic(int system_pid, int exit_status)
|
||||
{
|
||||
auto pid = cast(ProcessID)system_pid;
|
||||
|
||||
PosixEventDriverProcesses driver;
|
||||
lockedProcessInfo(pid, (ProcessInfo* info) @safe {
|
||||
// We get notified of any child exiting, so ignore the ones we're
|
||||
// not aware of
|
||||
if (info is null) return;
|
||||
|
||||
// Increment the ref count to make sure it doesn't get removed
|
||||
info.refCount++;
|
||||
|
||||
info.exited = true;
|
||||
info.exitCode = exit_status;
|
||||
driver = info.driver;
|
||||
});
|
||||
|
||||
if (driver)
|
||||
() @trusted { return cast(shared)driver; } ().onProcessExit(cast(int)pid);
|
||||
}
|
||||
|
||||
private static struct ProcessInfo {
|
||||
bool exited = true;
|
||||
int exitCode;
|
||||
ProcessWaitCallback[] callbacks;
|
||||
size_t refCount = 0;
|
||||
PosixEventDriverProcesses driver;
|
||||
|
||||
DataInitializer userDataDestructor;
|
||||
ubyte[16*size_t.sizeof] userData;
|
||||
}
|
||||
}
|
||||
|
||||
final class DummyEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProcesses {
|
||||
|
|
76
tests/issue-122-coalesced-sigchld.d
Normal file
76
tests/issue-122-coalesced-sigchld.d
Normal file
|
@ -0,0 +1,76 @@
|
|||
#!/usr/bin/env dub
|
||||
/+ dub.sdl:
|
||||
name "test"
|
||||
dependency "eventcore" path=".."
|
||||
+/
|
||||
|
||||
module test;
|
||||
|
||||
import core.time : Duration, msecs;
|
||||
import eventcore.core;
|
||||
import std.conv;
|
||||
import std.datetime;
|
||||
import std.process : thisProcessID;
|
||||
import std.stdio;
|
||||
|
||||
version (Windows) {
|
||||
void main()
|
||||
{
|
||||
writefln("Skipping SIGCHLD coalesce test on Windows.");
|
||||
}
|
||||
} else:
|
||||
|
||||
import core.sys.posix.sys.wait : waitpid, WNOHANG;
|
||||
|
||||
int numProc;
|
||||
|
||||
void main(string[] args)
|
||||
{
|
||||
// child mode
|
||||
if (args.length == 2)
|
||||
{
|
||||
import core.thread : Thread;
|
||||
writefln("Child: %s (%s) from %s", args[1], (args[1].to!long - Clock.currStdTime).hnsecs, thisProcessID);
|
||||
Thread.sleep((args[1].to!long - Clock.currStdTime).hnsecs);
|
||||
return;
|
||||
}
|
||||
|
||||
auto tm = eventDriver.timers.create();
|
||||
eventDriver.timers.set(tm, 5.seconds, 0.msecs);
|
||||
eventDriver.timers.wait(tm, (tm) @trusted {
|
||||
assert(false, "Test hung.");
|
||||
});
|
||||
|
||||
// attempt to let all child processes finish in exactly 1 second to force
|
||||
// signal coalescing
|
||||
auto targettime = Clock.currTime(UTC()) + 1.seconds;
|
||||
|
||||
auto procs = new Process[](20);
|
||||
foreach (i, ref p; procs) {
|
||||
p = eventDriver.processes.spawn(
|
||||
[args[0], targettime.stdTime.to!string],
|
||||
ProcessStdinFile(ProcessRedirect.inherit),
|
||||
ProcessStdoutFile(ProcessRedirect.inherit),
|
||||
ProcessStderrFile(ProcessRedirect.inherit),
|
||||
null, ProcessConfig.none, null
|
||||
);
|
||||
assert(p != Process.init);
|
||||
|
||||
writeln("Started child: ", p.pid);
|
||||
numProc++;
|
||||
}
|
||||
|
||||
foreach (p; procs) {
|
||||
eventDriver.processes.wait(p.pid, (ProcessID pid, int res) nothrow
|
||||
{
|
||||
numProc--;
|
||||
try writefln("Child %s exited with %s", pid, res);
|
||||
catch(Exception){}
|
||||
});
|
||||
}
|
||||
|
||||
do eventDriver.core.processEvents(Duration.max);
|
||||
while (numProc);
|
||||
|
||||
foreach (p; procs) assert(waitpid(cast(int)p.pid, null, WNOHANG) == -1);
|
||||
}
|
Loading…
Reference in a new issue