Merge pull request #122 from vibe-d/fix_zombie_processes

Use waitpid to iterate over all exited child processes
This commit is contained in:
Sönke Ludwig 2019-08-24 00:38:15 +02:00 committed by GitHub
commit bca94d5736
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 429 additions and 345 deletions

View file

@ -55,7 +55,7 @@ final class PosixEventDriver(Loop : PosixEventLoop) : EventDriver {
version (linux) alias WatcherDriver = InotifyEventDriverWatchers!EventsDriver;
//else version (OSX) alias WatcherDriver = FSEventsEventDriverWatchers!EventsDriver;
else alias WatcherDriver = PollEventDriverWatchers!EventsDriver;
version (linux) alias ProcessDriver = SignalEventDriverProcesses!Loop;
version (Posix) alias ProcessDriver = PosixEventDriverProcesses!Loop;
else alias ProcessDriver = DummyEventDriverProcesses!Loop;
Loop m_loop;

View file

@ -10,96 +10,34 @@ import std.algorithm.comparison : among;
import std.variant : visit;
import std.stdint;
private struct ProcessInfo {
bool exited = true;
int exitCode;
ProcessWaitCallback[] callbacks;
size_t refCount = 0;
EventDriverProcesses driver;
DataInitializer userDataDestructor;
ubyte[16*size_t.sizeof] userData;
}
private struct StaticProcesses {
@safe: nothrow:
import core.sync.mutex : Mutex;
private {
static shared Mutex m_mutex;
static __gshared ProcessInfo[ProcessID] m_processes;
}
shared static this()
{
m_mutex = new shared Mutex;
}
static void add(ProcessID pid, ProcessInfo info) @trusted {
m_mutex.lock_nothrow();
scope (exit) m_mutex.unlock_nothrow();
assert(pid !in m_processes, "Process adopted twice");
m_processes[pid] = info;
}
}
private auto lockedProcessInfo(alias fn)(ProcessID pid) @trusted {
StaticProcesses.m_mutex.lock_nothrow();
scope (exit) StaticProcesses.m_mutex.unlock_nothrow();
auto info = pid in StaticProcesses.m_processes;
return fn(info);
}
private enum SIGCHLD = 17;
final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProcesses {
final class PosixEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProcesses {
@safe: /*@nogc:*/ nothrow:
import core.stdc.errno : errno, EAGAIN, EINPROGRESS;
import core.sys.linux.sys.signalfd;
import core.sys.posix.unistd : close, read, write, dup;
import core.sync.mutex : Mutex;
import core.sys.posix.unistd : dup;
import core.thread : Thread;
private {
static shared Mutex s_mutex;
static __gshared ProcessInfo[ProcessID] s_processes;
static __gshared Thread s_waitThread;
Loop m_loop;
// FIXME: avoid virtual funciton calls and use the final type instead
EventDriver m_driver;
SignalListenID m_sighandle;
}
this(Loop loop, EventDriver driver)
{
import core.sys.posix.signal;
m_loop = loop;
m_driver = driver;
// Listen for child process exits using SIGCHLD
m_sighandle = () @trusted {
sigset_t sset;
sigemptyset(&sset);
sigaddset(&sset, SIGCHLD);
assert(sigprocmask(SIG_BLOCK, &sset, null) == 0);
return SignalListenID(signalfd(-1, &sset, SFD_NONBLOCK | SFD_CLOEXEC));
} ();
m_loop.initFD(cast(FD)m_sighandle, FDFlags.internal, SignalSlot(null));
m_loop.registerFD(cast(FD)m_sighandle, EventMask.read);
m_loop.setNotifyCallback!(EventType.read)(cast(FD)m_sighandle, &onSignal);
onSignal(cast(FD)m_sighandle);
}
void dispose()
{
FD sighandle = cast(FD)m_sighandle;
m_loop.m_fds[sighandle].common.refCount--;
m_loop.setNotifyCallback!(EventType.read)(sighandle, null);
m_loop.unregisterFD(sighandle, EventMask.read|EventMask.write|EventMask.status);
m_loop.clearFD!(SignalSlot)(sighandle);
close(cast(int)sighandle);
}
final override ProcessID adopt(int system_pid)
@ -110,8 +48,7 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
info.exited = false;
info.refCount = 1;
info.driver = this;
StaticProcesses.add(pid, info);
add(pid, info);
return pid;
}
@ -217,6 +154,9 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
@trusted {
import core.sys.posix.signal : pkill = kill;
assert(cast(int)pid > 0, "Invalid PID passed to kill.");
if (cast(int)pid > 0)
pkill(cast(int)pid, signal);
}
@ -225,18 +165,18 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
bool exited;
int exitCode;
size_t id = lockedProcessInfo!((info) {
size_t id = size_t.max;
lockedProcessInfo(pid, (info) {
assert(info !is null, "Unknown process ID");
if (info.exited) {
exited = true;
exitCode = info.exitCode;
return 0;
} else {
info.callbacks ~= on_process_exit;
return info.callbacks.length - 1;
id = info.callbacks.length - 1;
}
})(pid);
});
if (exited) {
on_process_exit(pid, exitCode);
@ -245,62 +185,22 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
return id;
}
final override void cancelWait(ProcessID pid, size_t waitId)
final override void cancelWait(ProcessID pid, size_t wait_id)
{
lockedProcessInfo!((info) {
if (wait_id == size_t.max) return;
lockedProcessInfo(pid, (info) {
assert(info !is null, "Unknown process ID");
assert(!info.exited, "Cannot cancel wait when none are pending");
assert(info.callbacks.length > waitId, "Invalid process wait ID");
assert(info.callbacks.length > wait_id, "Invalid process wait ID");
info.callbacks[waitId] = null;
})(pid);
info.callbacks[wait_id] = null;
});
}
private void onSignal(FD fd)
{
SignalListenID lid = cast(SignalListenID)fd;
signalfd_siginfo nfo;
do {
auto ret = () @trusted { return read(cast(int)fd, &nfo, nfo.sizeof); } ();
if (ret == -1 && errno.among!(EAGAIN, EINPROGRESS) || ret != nfo.sizeof)
return;
onProcessExit(nfo.ssi_pid, nfo.ssi_status);
} while (true);
}
private void onProcessExit(int system_pid, int exitCode)
{
auto pid = cast(ProcessID)system_pid;
ProcessWaitCallback[] callbacks;
auto driver = lockedProcessInfo!((info) @safe {
// We get notified of any child exiting, so ignore the ones we're
// not aware of
if (info is null) {
return null;
}
// Increment the ref count to make sure it doesn't get removed
info.refCount++;
info.exited = true;
info.exitCode = exitCode;
return info.driver;
})(pid);
// Need to call callbacks in the owner thread as this function can be
// called from any thread. Without extra threads this is always the main
// thread.
if (() @trusted { return cast(void*)this == cast(void*)driver; } ()) {
onLocalProcessExit(cast(intptr_t)pid);
} else if (driver) {
auto sharedDriver = () @trusted { return cast(shared typeof(this))driver; } ();
sharedDriver.m_driver.core.runInOwnerThread(&onLocalProcessExit, cast(intptr_t)pid);
}
private void onProcessExit(int system_pid)
shared {
m_driver.core.runInOwnerThread(&onLocalProcessExit, system_pid);
}
private static void onLocalProcessExit(intptr_t system_pid)
@ -310,7 +210,8 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
int exitCode;
ProcessWaitCallback[] callbacks;
auto driver = lockedProcessInfo!((info) {
PosixEventDriverProcesses driver;
lockedProcessInfo(pid, (info) {
assert(info !is null);
exitCode = info.exitCode;
@ -318,8 +219,8 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
callbacks = info.callbacks;
info.callbacks = null;
return info.driver;
})(pid);
driver = info.driver;
});
foreach (cb; callbacks) {
if (cb)
@ -331,53 +232,160 @@ final class SignalEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProce
final override bool hasExited(ProcessID pid)
{
return lockedProcessInfo!((info) {
bool ret;
lockedProcessInfo(pid, (info) {
assert(info !is null, "Unknown process ID");
return info.exited;
})(pid);
ret = info.exited;
});
return ret;
}
final override void addRef(ProcessID pid)
{
lockedProcessInfo!((info) {
lockedProcessInfo(pid, (info) {
nogc_assert(info.refCount > 0, "Adding reference to unreferenced process FD.");
info.refCount++;
})(pid);
});
}
final override bool releaseRef(ProcessID pid)
{
return lockedProcessInfo!((info) {
bool ret;
lockedProcessInfo(pid, (info) {
nogc_assert(info.refCount > 0, "Releasing reference to unreferenced process FD.");
if (--info.refCount == 0) {
// Remove/deallocate process
if (info.userDataDestructor)
() @trusted { info.userDataDestructor(info.userData.ptr); } ();
StaticProcesses.m_processes.remove(pid);
return false;
}
return true;
})(pid);
() @trusted { s_processes.remove(pid); } ();
ret = false;
} else ret = true;
});
return ret;
}
final protected override void* rawUserData(ProcessID pid, size_t size, DataInitializer initialize, DataInitializer destroy)
@system {
return lockedProcessInfo!((info) {
void* ret;
lockedProcessInfo(pid, (info) @safe nothrow {
assert(info.userDataDestructor is null || info.userDataDestructor is destroy,
"Requesting user data with differing type (destructor).");
assert(size <= ProcessInfo.userData.length, "Requested user data is too large.");
if (!info.userDataDestructor) {
initialize(info.userData.ptr);
() @trusted { initialize(info.userData.ptr); } ();
info.userDataDestructor = destroy;
}
return info.userData.ptr;
})(pid);
ret = () @trusted { return info.userData.ptr; } ();
});
return ret;
}
package final @property size_t pendingCount() const nothrow @trusted { return StaticProcesses.m_processes.length; }
package final @property size_t pendingCount() const nothrow @trusted { return s_processes.length; }
shared static this()
{
s_mutex = new shared Mutex;
}
private static void lockedProcessInfo(ProcessID pid, scope void delegate(ProcessInfo*) nothrow @safe fn)
{
s_mutex.lock_nothrow();
scope (exit) s_mutex.unlock_nothrow();
auto info = () @trusted { return pid in s_processes; } ();
fn(info);
}
private static void add(ProcessID pid, ProcessInfo info) @trusted {
s_mutex.lock_nothrow();
scope (exit) s_mutex.unlock_nothrow();
if (!s_waitThread) {
s_waitThread = new Thread(&waitForProcesses);
s_waitThread.start();
}
assert(pid !in s_processes, "Process adopted twice");
s_processes[pid] = info;
}
private static void waitForProcesses()
@system {
import core.sys.posix.sys.wait : idtype_t, WNOHANG, WNOWAIT, WEXITED, WEXITSTATUS, WIFEXITED, WTERMSIG, waitid, waitpid;
import core.sys.posix.signal : siginfo_t;
while (true) {
siginfo_t dummy;
auto ret = waitid(idtype_t.P_ALL, -1, &dummy, WEXITED|WNOWAIT);
if (ret == -1) {
{
s_mutex.lock_nothrow();
scope (exit) s_mutex.unlock_nothrow();
s_waitThread = null;
}
break;
}
ProcessID[] allprocs;
{
s_mutex.lock_nothrow();
scope (exit) s_mutex.unlock_nothrow();
() @trusted {
foreach (ref entry; s_processes.byKeyValue) {
if (!entry.value.exited)
allprocs ~= entry.key;
}
} ();
}
foreach (pid; allprocs) {
int status;
ret = () @trusted { return waitpid(cast(int)pid, &status, WNOHANG); } ();
if (ret == cast(int)pid) {
int exitstatus = WIFEXITED(status) ? WEXITSTATUS(status) : -WTERMSIG(status);
onProcessExitStatic(ret, exitstatus);
}
}
}
}
private static void onProcessExitStatic(int system_pid, int exit_status)
{
auto pid = cast(ProcessID)system_pid;
PosixEventDriverProcesses driver;
lockedProcessInfo(pid, (ProcessInfo* info) @safe {
// We get notified of any child exiting, so ignore the ones we're
// not aware of
if (info is null) return;
// Increment the ref count to make sure it doesn't get removed
info.refCount++;
info.exited = true;
info.exitCode = exit_status;
driver = info.driver;
});
if (driver)
() @trusted { return cast(shared)driver; } ().onProcessExit(cast(int)pid);
}
private static struct ProcessInfo {
bool exited = true;
int exitCode;
ProcessWaitCallback[] callbacks;
size_t refCount = 0;
PosixEventDriverProcesses driver;
DataInitializer userDataDestructor;
ubyte[16*size_t.sizeof] userData;
}
}
final class DummyEventDriverProcesses(Loop : PosixEventLoop) : EventDriverProcesses {

View file

@ -0,0 +1,76 @@
#!/usr/bin/env dub
/+ dub.sdl:
name "test"
dependency "eventcore" path=".."
+/
module test;
import core.time : Duration, msecs;
import eventcore.core;
import std.conv;
import std.datetime;
import std.process : thisProcessID;
import std.stdio;
version (Windows) {
void main()
{
writefln("Skipping SIGCHLD coalesce test on Windows.");
}
} else:
import core.sys.posix.sys.wait : waitpid, WNOHANG;
int numProc;
void main(string[] args)
{
// child mode
if (args.length == 2)
{
import core.thread : Thread;
writefln("Child: %s (%s) from %s", args[1], (args[1].to!long - Clock.currStdTime).hnsecs, thisProcessID);
Thread.sleep((args[1].to!long - Clock.currStdTime).hnsecs);
return;
}
auto tm = eventDriver.timers.create();
eventDriver.timers.set(tm, 5.seconds, 0.msecs);
eventDriver.timers.wait(tm, (tm) @trusted {
assert(false, "Test hung.");
});
// attempt to let all child processes finish in exactly 1 second to force
// signal coalescing
auto targettime = Clock.currTime(UTC()) + 1.seconds;
auto procs = new Process[](20);
foreach (i, ref p; procs) {
p = eventDriver.processes.spawn(
[args[0], targettime.stdTime.to!string],
ProcessStdinFile(ProcessRedirect.inherit),
ProcessStdoutFile(ProcessRedirect.inherit),
ProcessStderrFile(ProcessRedirect.inherit),
null, ProcessConfig.none, null
);
assert(p != Process.init);
writeln("Started child: ", p.pid);
numProc++;
}
foreach (p; procs) {
eventDriver.processes.wait(p.pid, (ProcessID pid, int res) nothrow
{
numProc--;
try writefln("Child %s exited with %s", pid, res);
catch(Exception){}
});
}
do eventDriver.core.processEvents(Duration.max);
while (numProc);
foreach (p; procs) assert(waitpid(cast(int)p.pid, null, WNOHANG) == -1);
}