Merge pull request #122 from vibe-d/fix_zombie_processes

Use waitpid to iterate over all exited child processes
This commit is contained in:
Sönke Ludwig 2019-08-24 00:38:15 +02:00 committed by GitHub
commit bca94d5736
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 429 additions and 345 deletions

View file

@ -55,7 +55,7 @@ final class PosixEventDriver(Loop : PosixEventLoop) : EventDriver {
version (linux) alias WatcherDriver = InotifyEventDriverWatchers!EventsDriver; version (linux) alias WatcherDriver = InotifyEventDriverWatchers!EventsDriver;
//else version (OSX) alias WatcherDriver = FSEventsEventDriverWatchers!EventsDriver; //else version (OSX) alias WatcherDriver = FSEventsEventDriverWatchers!EventsDriver;
else alias WatcherDriver = PollEventDriverWatchers!EventsDriver; else alias WatcherDriver = PollEventDriverWatchers!EventsDriver;
version (linux) alias ProcessDriver = SignalEventDriverProcesses!Loop; version (Posix) alias ProcessDriver = PosixEventDriverProcesses!Loop;
else alias ProcessDriver = DummyEventDriverProcesses!Loop; else alias ProcessDriver = DummyEventDriverProcesses!Loop;
Loop m_loop; Loop m_loop;

View file

@ -10,96 +10,34 @@ import std.algorithm.comparison : among;
import std.variant : visit; import std.variant : visit;
import std.stdint; import std.stdint;
private struct ProcessInfo {
bool exited = true;
int exitCode;
ProcessWaitCallback[] callbacks;
size_t refCount = 0;
EventDriverProcesses driver;
DataInitializer userDataDestructor;
ubyte[16*size_t.sizeof] userData;
}
private struct StaticProcesses {
@safe: nothrow:
import core.sync.mutex : Mutex;
private {
static shared Mutex m_mutex;
static __gshared ProcessInfo[ProcessID] m_processes;
}
shared static this()
{
m_mutex = new shared Mutex;
}
static void add(ProcessID pid, ProcessInfo info) @trusted {
m_mutex.lock_nothrow();
scope (exit) m_mutex.unlock_nothrow();
assert(pid !in m_processes, "Process adopted twice");
m_processes[pid] = info;
}
}
private auto lockedProcessInfo(alias fn)(ProcessID pid) @trusted {
StaticProcesses.m_mutex.lock_nothrow();
scope (exit) StaticProcesses.m_mutex.unlock_nothrow();
auto info = pid in StaticProcesses.m_processes;
return fn(info);
}
private enum SIGCHLD = 17; private enum SIGCHLD = 17;
final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProcesses { final class PosixEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProcesses {
@safe: /*@nogc:*/ nothrow: @safe: /*@nogc:*/ nothrow:
import core.stdc.errno : errno, EAGAIN, EINPROGRESS; import core.sync.mutex : Mutex;
import core.sys.linux.sys.signalfd; import core.sys.posix.unistd : dup;
import core.sys.posix.unistd : close, read, write, dup; import core.thread : Thread;
private { private {
static shared Mutex s_mutex;
static __gshared ProcessInfo[ProcessID] s_processes;
static __gshared Thread s_waitThread;
Loop m_loop; Loop m_loop;
// FIXME: avoid virtual funciton calls and use the final type instead
EventDriver m_driver; EventDriver m_driver;
SignalListenID m_sighandle;
} }
this(Loop loop, EventDriver driver) this(Loop loop, EventDriver driver)
{ {
import core.sys.posix.signal;
m_loop = loop; m_loop = loop;
m_driver = driver; m_driver = driver;
// Listen for child process exits using SIGCHLD
m_sighandle = () @trusted {
sigset_t sset;
sigemptyset(&sset);
sigaddset(&sset, SIGCHLD);
assert(sigprocmask(SIG_BLOCK, &sset, null) == 0);
return SignalListenID(signalfd(-1, &sset, SFD_NONBLOCK | SFD_CLOEXEC));
} ();
m_loop.initFD(cast(FD)m_sighandle, FDFlags.internal, SignalSlot(null));
m_loop.registerFD(cast(FD)m_sighandle, EventMask.read);
m_loop.setNotifyCallback!(EventType.read)(cast(FD)m_sighandle, &onSignal);
onSignal(cast(FD)m_sighandle);
} }
void dispose() void dispose()
{ {
FD sighandle = cast(FD)m_sighandle;
m_loop.m_fds[sighandle].common.refCount--;
m_loop.setNotifyCallback!(EventType.read)(sighandle, null);
m_loop.unregisterFD(sighandle, EventMask.read|EventMask.write|EventMask.status);
m_loop.clearFD!(SignalSlot)(sighandle);
close(cast(int)sighandle);
} }
final override ProcessID adopt(int system_pid) final override ProcessID adopt(int system_pid)
@ -110,8 +48,7 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
info.exited = false; info.exited = false;
info.refCount = 1; info.refCount = 1;
info.driver = this; info.driver = this;
StaticProcesses.add(pid, info); add(pid, info);
return pid; return pid;
} }
@ -217,6 +154,9 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
@trusted { @trusted {
import core.sys.posix.signal : pkill = kill; import core.sys.posix.signal : pkill = kill;
assert(cast(int)pid > 0, "Invalid PID passed to kill.");
if (cast(int)pid > 0)
pkill(cast(int)pid, signal); pkill(cast(int)pid, signal);
} }
@ -225,18 +165,18 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
bool exited; bool exited;
int exitCode; int exitCode;
size_t id = lockedProcessInfo!((info) { size_t id = size_t.max;
lockedProcessInfo(pid, (info) {
assert(info !is null, "Unknown process ID"); assert(info !is null, "Unknown process ID");
if (info.exited) { if (info.exited) {
exited = true; exited = true;
exitCode = info.exitCode; exitCode = info.exitCode;
return 0;
} else { } else {
info.callbacks ~= on_process_exit; info.callbacks ~= on_process_exit;
return info.callbacks.length - 1; id = info.callbacks.length - 1;
} }
})(pid); });
if (exited) { if (exited) {
on_process_exit(pid, exitCode); on_process_exit(pid, exitCode);
@ -245,62 +185,22 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
return id; return id;
} }
final override void cancelWait(ProcessID pid, size_t waitId) final override void cancelWait(ProcessID pid, size_t wait_id)
{ {
lockedProcessInfo!((info) { if (wait_id == size_t.max) return;
lockedProcessInfo(pid, (info) {
assert(info !is null, "Unknown process ID"); assert(info !is null, "Unknown process ID");
assert(!info.exited, "Cannot cancel wait when none are pending"); assert(!info.exited, "Cannot cancel wait when none are pending");
assert(info.callbacks.length > waitId, "Invalid process wait ID"); assert(info.callbacks.length > wait_id, "Invalid process wait ID");
info.callbacks[waitId] = null; info.callbacks[wait_id] = null;
})(pid); });
} }
private void onSignal(FD fd) private void onProcessExit(int system_pid)
{ shared {
SignalListenID lid = cast(SignalListenID)fd; m_driver.core.runInOwnerThread(&onLocalProcessExit, system_pid);
signalfd_siginfo nfo;
do {
auto ret = () @trusted { return read(cast(int)fd, &nfo, nfo.sizeof); } ();
if (ret == -1 && errno.among!(EAGAIN, EINPROGRESS) || ret != nfo.sizeof)
return;
onProcessExit(nfo.ssi_pid, nfo.ssi_status);
} while (true);
}
private void onProcessExit(int system_pid, int exitCode)
{
auto pid = cast(ProcessID)system_pid;
ProcessWaitCallback[] callbacks;
auto driver = lockedProcessInfo!((info) @safe {
// We get notified of any child exiting, so ignore the ones we're
// not aware of
if (info is null) {
return null;
}
// Increment the ref count to make sure it doesn't get removed
info.refCount++;
info.exited = true;
info.exitCode = exitCode;
return info.driver;
})(pid);
// Need to call callbacks in the owner thread as this function can be
// called from any thread. Without extra threads this is always the main
// thread.
if (() @trusted { return cast(void*)this == cast(void*)driver; } ()) {
onLocalProcessExit(cast(intptr_t)pid);
} else if (driver) {
auto sharedDriver = () @trusted { return cast(shared typeof(this))driver; } ();
sharedDriver.m_driver.core.runInOwnerThread(&onLocalProcessExit, cast(intptr_t)pid);
}
} }
private static void onLocalProcessExit(intptr_t system_pid) private static void onLocalProcessExit(intptr_t system_pid)
@ -310,7 +210,8 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
int exitCode; int exitCode;
ProcessWaitCallback[] callbacks; ProcessWaitCallback[] callbacks;
auto driver = lockedProcessInfo!((info) { PosixEventDriverProcesses driver;
lockedProcessInfo(pid, (info) {
assert(info !is null); assert(info !is null);
exitCode = info.exitCode; exitCode = info.exitCode;
@ -318,8 +219,8 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
callbacks = info.callbacks; callbacks = info.callbacks;
info.callbacks = null; info.callbacks = null;
return info.driver; driver = info.driver;
})(pid); });
foreach (cb; callbacks) { foreach (cb; callbacks) {
if (cb) if (cb)
@ -331,53 +232,160 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
final override bool hasExited(ProcessID pid) final override bool hasExited(ProcessID pid)
{ {
return lockedProcessInfo!((info) { bool ret;
lockedProcessInfo(pid, (info) {
assert(info !is null, "Unknown process ID"); assert(info !is null, "Unknown process ID");
ret = info.exited;
return info.exited; });
})(pid); return ret;
} }
final override void addRef(ProcessID pid) final override void addRef(ProcessID pid)
{ {
lockedProcessInfo!((info) { lockedProcessInfo(pid, (info) {
nogc_assert(info.refCount > 0, "Adding reference to unreferenced process FD."); nogc_assert(info.refCount > 0, "Adding reference to unreferenced process FD.");
info.refCount++; info.refCount++;
})(pid); });
} }
final override bool releaseRef(ProcessID pid) final override bool releaseRef(ProcessID pid)
{ {
return lockedProcessInfo!((info) { bool ret;
lockedProcessInfo(pid, (info) {
nogc_assert(info.refCount > 0, "Releasing reference to unreferenced process FD."); nogc_assert(info.refCount > 0, "Releasing reference to unreferenced process FD.");
if (--info.refCount == 0) { if (--info.refCount == 0) {
// Remove/deallocate process // Remove/deallocate process
if (info.userDataDestructor) if (info.userDataDestructor)
() @trusted { info.userDataDestructor(info.userData.ptr); } (); () @trusted { info.userDataDestructor(info.userData.ptr); } ();
StaticProcesses.m_processes.remove(pid); () @trusted { s_processes.remove(pid); } ();
return false; ret = false;
} } else ret = true;
return true; });
})(pid); return ret;
} }
final protected override void* rawUserData(ProcessID pid, size_t size, DataInitializer initialize, DataInitializer destroy) final protected override void* rawUserData(ProcessID pid, size_t size, DataInitializer initialize, DataInitializer destroy)
@system { @system {
return lockedProcessInfo!((info) { void* ret;
lockedProcessInfo(pid, (info) @safe nothrow {
assert(info.userDataDestructor is null || info.userDataDestructor is destroy, assert(info.userDataDestructor is null || info.userDataDestructor is destroy,
"Requesting user data with differing type (destructor)."); "Requesting user data with differing type (destructor).");
assert(size <= ProcessInfo.userData.length, "Requested user data is too large."); assert(size <= ProcessInfo.userData.length, "Requested user data is too large.");
if (!info.userDataDestructor) { if (!info.userDataDestructor) {
initialize(info.userData.ptr); () @trusted { initialize(info.userData.ptr); } ();
info.userDataDestructor = destroy; info.userDataDestructor = destroy;
} }
return info.userData.ptr; ret = () @trusted { return info.userData.ptr; } ();
})(pid); });
return ret;
} }
package final @property size_t pendingCount() const nothrow @trusted { return StaticProcesses.m_processes.length; } package final @property size_t pendingCount() const nothrow @trusted { return s_processes.length; }
shared static this()
{
s_mutex = new shared Mutex;
}
private static void lockedProcessInfo(ProcessID pid, scope void delegate(ProcessInfo*) nothrow @safe fn)
{
s_mutex.lock_nothrow();
scope (exit) s_mutex.unlock_nothrow();
auto info = () @trusted { return pid in s_processes; } ();
fn(info);
}
private static void add(ProcessID pid, ProcessInfo info) @trusted {
s_mutex.lock_nothrow();
scope (exit) s_mutex.unlock_nothrow();
if (!s_waitThread) {
s_waitThread = new Thread(&waitForProcesses);
s_waitThread.start();
}
assert(pid !in s_processes, "Process adopted twice");
s_processes[pid] = info;
}
private static void waitForProcesses()
@system {
import core.sys.posix.sys.wait : idtype_t, WNOHANG, WNOWAIT, WEXITED, WEXITSTATUS, WIFEXITED, WTERMSIG, waitid, waitpid;
import core.sys.posix.signal : siginfo_t;
while (true) {
siginfo_t dummy;
auto ret = waitid(idtype_t.P_ALL, -1, &dummy, WEXITED|WNOWAIT);
if (ret == -1) {
{
s_mutex.lock_nothrow();
scope (exit) s_mutex.unlock_nothrow();
s_waitThread = null;
}
break;
}
ProcessID[] allprocs;
{
s_mutex.lock_nothrow();
scope (exit) s_mutex.unlock_nothrow();
() @trusted {
foreach (ref entry; s_processes.byKeyValue) {
if (!entry.value.exited)
allprocs ~= entry.key;
}
} ();
}
foreach (pid; allprocs) {
int status;
ret = () @trusted { return waitpid(cast(int)pid, &status, WNOHANG); } ();
if (ret == cast(int)pid) {
int exitstatus = WIFEXITED(status) ? WEXITSTATUS(status) : -WTERMSIG(status);
onProcessExitStatic(ret, exitstatus);
}
}
}
}
private static void onProcessExitStatic(int system_pid, int exit_status)
{
auto pid = cast(ProcessID)system_pid;
PosixEventDriverProcesses driver;
lockedProcessInfo(pid, (ProcessInfo* info) @safe {
// We get notified of any child exiting, so ignore the ones we're
// not aware of
if (info is null) return;
// Increment the ref count to make sure it doesn't get removed
info.refCount++;
info.exited = true;
info.exitCode = exit_status;
driver = info.driver;
});
if (driver)
() @trusted { return cast(shared)driver; } ().onProcessExit(cast(int)pid);
}
private static struct ProcessInfo {
bool exited = true;
int exitCode;
ProcessWaitCallback[] callbacks;
size_t refCount = 0;
PosixEventDriverProcesses driver;
DataInitializer userDataDestructor;
ubyte[16*size_t.sizeof] userData;
}
} }
final class DummyEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProcesses { final class DummyEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProcesses {

View file

@ -0,0 +1,76 @@
#!/usr/bin/env dub
/+ dub.sdl:
name "test"
dependency "eventcore" path=".."
+/
module test;
import core.time : Duration, msecs;
import eventcore.core;
import std.conv;
import std.datetime;
import std.process : thisProcessID;
import std.stdio;
version (Windows) {
void main()
{
writefln("Skipping SIGCHLD coalesce test on Windows.");
}
} else:
import core.sys.posix.sys.wait : waitpid, WNOHANG;
int numProc;
void main(string[] args)
{
// child mode
if (args.length == 2)
{
import core.thread : Thread;
writefln("Child: %s (%s) from %s", args[1], (args[1].to!long - Clock.currStdTime).hnsecs, thisProcessID);
Thread.sleep((args[1].to!long - Clock.currStdTime).hnsecs);
return;
}
auto tm = eventDriver.timers.create();
eventDriver.timers.set(tm, 5.seconds, 0.msecs);
eventDriver.timers.wait(tm, (tm) @trusted {
assert(false, "Test hung.");
});
// attempt to let all child processes finish in exactly 1 second to force
// signal coalescing
auto targettime = Clock.currTime(UTC()) + 1.seconds;
auto procs = new Process[](20);
foreach (i, ref p; procs) {
p = eventDriver.processes.spawn(
[args[0], targettime.stdTime.to!string],
ProcessStdinFile(ProcessRedirect.inherit),
ProcessStdoutFile(ProcessRedirect.inherit),
ProcessStderrFile(ProcessRedirect.inherit),
null, ProcessConfig.none, null
);
assert(p != Process.init);
writeln("Started child: ", p.pid);
numProc++;
}
foreach (p; procs) {
eventDriver.processes.wait(p.pid, (ProcessID pid, int res) nothrow
{
numProc--;
try writefln("Child %s exited with %s", pid, res);
catch(Exception){}
});
}
do eventDriver.core.processEvents(Duration.max);
while (numProc);
foreach (p; procs) assert(waitpid(cast(int)p.pid, null, WNOHANG) == -1);
}