annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/include/kj/async-unix.h @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
jpayne@69 2 // Licensed under the MIT License:
jpayne@69 3 //
jpayne@69 4 // Permission is hereby granted, free of charge, to any person obtaining a copy
jpayne@69 5 // of this software and associated documentation files (the "Software"), to deal
jpayne@69 6 // in the Software without restriction, including without limitation the rights
jpayne@69 7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
jpayne@69 8 // copies of the Software, and to permit persons to whom the Software is
jpayne@69 9 // furnished to do so, subject to the following conditions:
jpayne@69 10 //
jpayne@69 11 // The above copyright notice and this permission notice shall be included in
jpayne@69 12 // all copies or substantial portions of the Software.
jpayne@69 13 //
jpayne@69 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
jpayne@69 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
jpayne@69 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
jpayne@69 17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
jpayne@69 18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
jpayne@69 19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
jpayne@69 20 // THE SOFTWARE.
jpayne@69 21
jpayne@69 22 #pragma once
jpayne@69 23
jpayne@69 24 #if _WIN32
jpayne@69 25 #error "This file is Unix-specific. On Windows, include async-win32.h instead."
jpayne@69 26 #endif
jpayne@69 27
jpayne@69 28 #include "async.h"
jpayne@69 29 #include "timer.h"
jpayne@69 30 #include <kj/vector.h>
jpayne@69 31 #include <kj/io.h>
jpayne@69 32 #include <signal.h>
jpayne@69 33
jpayne@69 34 KJ_BEGIN_HEADER
jpayne@69 35
jpayne@69 36 #if !defined(KJ_USE_EPOLL) && !defined(KJ_USE_KQUEUE)
jpayne@69 37 #if __linux__
jpayne@69 38 // Default to epoll on Linux.
jpayne@69 39 #define KJ_USE_EPOLL 1
jpayne@69 40 #elif __APPLE__ || __FreeBSD__ || __OpenBSD__ || __NetBSD__ || __DragonFly__
jpayne@69 41 // MacOS and BSDs prefer kqueue() for event notification.
jpayne@69 42 #define KJ_USE_KQUEUE 1
jpayne@69 43 #endif
jpayne@69 44 #endif
jpayne@69 45
jpayne@69 46 #if KJ_USE_EPOLL && KJ_USE_KQUEUE
jpayne@69 47 #error "Both KJ_USE_EPOLL and KJ_USE_KQUEUE are set. Please choose only one of these."
jpayne@69 48 #endif
jpayne@69 49
jpayne@69 50 #if __CYGWIN__ && !defined(KJ_USE_PIPE_FOR_WAKEUP)
jpayne@69 51 // Cygwin has serious issues with the intersection of signals and threads, reported here:
jpayne@69 52 // https://cygwin.com/ml/cygwin/2019-07/msg00052.html
jpayne@69 53 // On Cygwin, therefore, we do not use signals to wake threads. Instead, each thread allocates a
jpayne@69 54 // pipe, and we write a byte to the pipe to wake the thread... ick.
jpayne@69 55 #define KJ_USE_PIPE_FOR_WAKEUP 1
jpayne@69 56 #endif
jpayne@69 57
jpayne@69 58 #if KJ_USE_EPOLL
jpayne@69 59 struct epoll_event;
jpayne@69 60 #elif KJ_USE_KQUEUE
jpayne@69 61 struct kevent;
jpayne@69 62 struct timespec;
jpayne@69 63 #endif
jpayne@69 64
jpayne@69 65 namespace kj {
jpayne@69 66
jpayne@69 67 class UnixEventPort: public EventPort {
jpayne@69 68 // An EventPort implementation which can wait for events on file descriptors as well as signals.
jpayne@69 69 // This API only makes sense on Unix.
jpayne@69 70 //
jpayne@69 71 // The implementation uses `poll()` or possibly a platform-specific API (e.g. epoll, kqueue).
jpayne@69 72 // To also wait on signals without race conditions, the implementation may block signals until
jpayne@69 73 // just before `poll()` while using a signal handler which `siglongjmp()`s back to just before
jpayne@69 74 // the signal was unblocked, or it may use a nicer platform-specific API.
jpayne@69 75 //
jpayne@69 76 // The implementation reserves a signal for internal use. By default, it uses SIGUSR1. If you
jpayne@69 77 // need to use SIGUSR1 for something else, you must offer a different signal by calling
jpayne@69 78 // setReservedSignal() at startup. (On Linux, no signal is reserved; eventfd is used instead.)
jpayne@69 79 //
jpayne@69 80 // WARNING: A UnixEventPort can only be used in the thread and process that created it. In
jpayne@69 81 // particular, note that after a fork(), a UnixEventPort created in the parent process will
jpayne@69 82 // not work correctly in the child, even if the parent ceases to use its copy. In particular
jpayne@69 83 // note that this means that server processes which daemonize themselves at startup must wait
jpayne@69 84 // until after daemonization to create a UnixEventPort.
jpayne@69 85 //
jpayne@69 86 // TODO(cleanup): The above warning is no longer accurate -- daemonizing after creating a
jpayne@69 87 // UnixEventPort should now work since we no longer use signalfd. But do we want to commit to
jpayne@69 88 // keeping it that way? Note it's still unsafe to fork() and then use UnixEventPort from both
jpayne@69 89 // processes!
jpayne@69 90
jpayne@69 91 public:
jpayne@69 92 UnixEventPort();
jpayne@69 93
jpayne@69 94 ~UnixEventPort() noexcept(false);
jpayne@69 95
jpayne@69 96 class FdObserver;
jpayne@69 97 // Class that watches an fd for readability or writability. See definition below.
jpayne@69 98
jpayne@69 99 Promise<siginfo_t> onSignal(int signum);
jpayne@69 100 // When the given signal is delivered to this thread, return the corresponding siginfo_t.
jpayne@69 101 // The signal must have been captured using `captureSignal()`.
jpayne@69 102 //
jpayne@69 103 // If `onSignal()` has not been called, the signal will remain blocked in this thread.
jpayne@69 104 // Therefore, a signal which arrives before `onSignal()` was called will not be "missed" -- the
jpayne@69 105 // next call to 'onSignal()' will receive it. Also, you can control which thread receives a
jpayne@69 106 // process-wide signal by only calling `onSignal()` on that thread's event loop.
jpayne@69 107 //
jpayne@69 108 // The result of waiting on the same signal twice at once is undefined.
jpayne@69 109 //
jpayne@69 110 // WARNING: On MacOS and iOS, `onSignal()` will only see process-level signals, NOT
jpayne@69 111 // thread-specific signals (i.e. not those sent with pthread_kill()). This is a limitation of
jpayne@69 112 // Apple's implemnetation of kqueue() introduced in MacOS 10.14 which Apple says is not a bug.
jpayne@69 113 // See: https://github.com/libevent/libevent/issues/765 Consider using kj::Executor or
jpayne@69 114 // kj::newPromiseAndCrossThreadFulfiller() for cross-thread communications instead of signals.
jpayne@69 115 // If you must have signals, build KJ and your app with `-DKJ_USE_KQUEUE=0`, which will cause
jpayne@69 116 // KJ to fall back to a generic poll()-based implementation that is less efficient but handles
jpayne@69 117 // thread-specific signals.
jpayne@69 118
jpayne@69 119 static void captureSignal(int signum);
jpayne@69 120 // Arranges for the given signal to be captured and handled via UnixEventPort, so that you may
jpayne@69 121 // then pass it to `onSignal()`. This method is static because it registers a signal handler
jpayne@69 122 // which applies process-wide. If any other threads exist in the process when `captureSignal()`
jpayne@69 123 // is called, you *must* set the signal mask in those threads to block this signal, otherwise
jpayne@69 124 // terrible things will happen if the signal happens to be delivered to those threads. If at
jpayne@69 125 // all possible, call `captureSignal()` *before* creating threads, so that threads you create in
jpayne@69 126 // the future will inherit the proper signal mask.
jpayne@69 127 //
jpayne@69 128 // To un-capture a signal, simply install a different signal handler and then un-block it from
jpayne@69 129 // the signal mask.
jpayne@69 130
jpayne@69 131 static void setReservedSignal(int signum);
jpayne@69 132 // Sets the signal number which `UnixEventPort` reserves for internal use. If your application
jpayne@69 133 // needs to use SIGUSR1, call this at startup (before any calls to `captureSignal()` and before
jpayne@69 134 // constructing an `UnixEventPort`) to offer a different signal.
jpayne@69 135
jpayne@69 136 Timer& getTimer() { return timerImpl; }
jpayne@69 137
jpayne@69 138 Promise<int> onChildExit(Maybe<pid_t>& pid);
jpayne@69 139 // When the given child process exits, resolves to its wait status, as returned by wait(2). You
jpayne@69 140 // will need to use the WIFEXITED() etc. macros to interpret the status code.
jpayne@69 141 //
jpayne@69 142 // You must call onChildExit() immediately after the child is created, before returning to the
jpayne@69 143 // event loop. Otherwise, you may miss the child exit event.
jpayne@69 144 //
jpayne@69 145 // `pid` is a reference to a Maybe<pid_t> which must be non-null at the time of the call. When
jpayne@69 146 // wait() is invoked (and indicates this pid has finished), `pid` will be nulled out. This is
jpayne@69 147 // necessary to avoid a race condition: as soon as the child has been wait()ed, the PID table
jpayne@69 148 // entry is freed and can then be reused. So, if you ever want safely to call `kill()` on the
jpayne@69 149 // PID, it's necessary to know whether it has been wait()ed already. Since the promise's
jpayne@69 150 // .then() continuation may not run immediately, we need a more precise way, hence we null out
jpayne@69 151 // the Maybe.
jpayne@69 152 //
jpayne@69 153 // The caller must NOT null out `pid` on its own unless it cancels the Promise first. If the
jpayne@69 154 // caller decides to cancel the Promise, and `pid` is still non-null after this cancellation,
jpayne@69 155 // then the caller is expected to `waitpid()` on it BEFORE returning to the event loop again.
jpayne@69 156 // Probably, the caller should kill() the child before waiting to avoid a hang. If the caller
jpayne@69 157 // fails to do its own waitpid() before returning to the event loop, the child may become a
jpayne@69 158 // zombie, or may be reaped automatically, depending on the platform -- since the caller does not
jpayne@69 159 // know, the caller cannot try to reap the zombie later.
jpayne@69 160 //
jpayne@69 161 // You must call `kj::UnixEventPort::captureChildExit()` early in your program if you want to use
jpayne@69 162 // `onChildExit()`.
jpayne@69 163 //
jpayne@69 164 // WARNING: Only one UnixEventPort per process is allowed to use onChildExit(). This is because
jpayne@69 165 // child exit is signaled to the process via SIGCHLD, and Unix does not allow the program to
jpayne@69 166 // control which thread receives the signal. (We may fix this in the future by automatically
jpayne@69 167 // coordinating between threads when multiple threads are expecting child exits.)
jpayne@69 168 // WARNING 2: If any UnixEventPort in the process is currently waiting for onChildExit(), then
jpayne@69 169 // *only* that port's thread can safely wait on child processes, even synchronously. This is
jpayne@69 170 // because the thread which used onChildExit() uses wait() to reap children, without specifying
jpayne@69 171 // which child, and therefore it may inadvertently reap children created by other threads.
jpayne@69 172
jpayne@69 173 static void captureChildExit();
jpayne@69 174 // Arranges for child process exit to be captured and handled via UnixEventPort, so that you may
jpayne@69 175 // call `onChildExit()`. Much like `captureSignal()`, this static method must be called early on
jpayne@69 176 // in program startup.
jpayne@69 177 //
jpayne@69 178 // This method may capture the `SIGCHLD` signal. You must not use `captureSignal(SIGCHLD)` nor
jpayne@69 179 // `onSignal(SIGCHLD)` in your own code if you use `captureChildExit()`.
jpayne@69 180
jpayne@69 181 // implements EventPort ------------------------------------------------------
jpayne@69 182 bool wait() override;
jpayne@69 183 bool poll() override;
jpayne@69 184 void wake() const override;
jpayne@69 185
jpayne@69 186 private:
jpayne@69 187 class SignalPromiseAdapter;
jpayne@69 188 class ChildExitPromiseAdapter;
jpayne@69 189
jpayne@69 190 const MonotonicClock& clock;
jpayne@69 191 TimerImpl timerImpl;
jpayne@69 192
jpayne@69 193 #if !KJ_USE_KQUEUE
jpayne@69 194 SignalPromiseAdapter* signalHead = nullptr;
jpayne@69 195 SignalPromiseAdapter** signalTail = &signalHead;
jpayne@69 196
jpayne@69 197 void gotSignal(const siginfo_t& siginfo);
jpayne@69 198 #endif
jpayne@69 199
jpayne@69 200 friend class TimerPromiseAdapter;
jpayne@69 201
jpayne@69 202 #if KJ_USE_EPOLL
jpayne@69 203 sigset_t originalMask;
jpayne@69 204 AutoCloseFd epollFd;
jpayne@69 205 AutoCloseFd eventFd; // Used for cross-thread wakeups.
jpayne@69 206
jpayne@69 207 bool processEpollEvents(struct epoll_event events[], int n);
jpayne@69 208 #elif KJ_USE_KQUEUE
jpayne@69 209 AutoCloseFd kqueueFd;
jpayne@69 210
jpayne@69 211 bool doKqueueWait(struct timespec* timeout);
jpayne@69 212 #else
jpayne@69 213 class PollContext;
jpayne@69 214
jpayne@69 215 FdObserver* observersHead = nullptr;
jpayne@69 216 FdObserver** observersTail = &observersHead;
jpayne@69 217
jpayne@69 218 #if KJ_USE_PIPE_FOR_WAKEUP
jpayne@69 219 AutoCloseFd wakePipeIn;
jpayne@69 220 AutoCloseFd wakePipeOut;
jpayne@69 221 #else
jpayne@69 222 unsigned long long threadId; // actually pthread_t
jpayne@69 223 #endif
jpayne@69 224 #endif
jpayne@69 225
jpayne@69 226 #if !KJ_USE_KQUEUE
jpayne@69 227 struct ChildSet;
jpayne@69 228 Maybe<Own<ChildSet>> childSet;
jpayne@69 229 #endif
jpayne@69 230
jpayne@69 231 static void signalHandler(int, siginfo_t* siginfo, void*) noexcept;
jpayne@69 232 static void registerSignalHandler(int signum);
jpayne@69 233 #if !KJ_USE_EPOLL && !KJ_USE_KQUEUE && !KJ_USE_PIPE_FOR_WAKEUP
jpayne@69 234 static void registerReservedSignal();
jpayne@69 235 #endif
jpayne@69 236 static void ignoreSigpipe();
jpayne@69 237 };
jpayne@69 238
jpayne@69 239 class UnixEventPort::FdObserver: private AsyncObject {
jpayne@69 240 // Object which watches a file descriptor to determine when it is readable or writable.
jpayne@69 241 //
jpayne@69 242 // For listen sockets, "readable" means that there is a connection to accept(). For everything
jpayne@69 243 // else, it means that read() (or recv()) will return data.
jpayne@69 244 //
jpayne@69 245 // The presence of out-of-band data should NOT fire this event. However, the event may
jpayne@69 246 // occasionally fire spuriously (when there is actually no data to read), and one thing that can
jpayne@69 247 // cause such spurious events is the arrival of OOB data on certain platforms whose event
jpayne@69 248 // interfaces fail to distinguish between regular and OOB data (e.g. Mac OSX).
jpayne@69 249 //
jpayne@69 250 // WARNING: The exact behavior of this class differs across systems, since event interfaces
jpayne@69 251 // vary wildly. Be sure to read the documentation carefully and avoid depending on unspecified
jpayne@69 252 // behavior. If at all possible, use the higher-level AsyncInputStream interface instead.
jpayne@69 253
jpayne@69 254 public:
jpayne@69 255 enum Flags {
jpayne@69 256 OBSERVE_READ = 1,
jpayne@69 257 OBSERVE_WRITE = 2,
jpayne@69 258 OBSERVE_URGENT = 4,
jpayne@69 259 OBSERVE_READ_WRITE = OBSERVE_READ | OBSERVE_WRITE
jpayne@69 260 };
jpayne@69 261
jpayne@69 262 FdObserver(UnixEventPort& eventPort, int fd, uint flags);
jpayne@69 263 // Begin watching the given file descriptor for readability. Only one ReadObserver may exist
jpayne@69 264 // for a given file descriptor at a time.
jpayne@69 265
jpayne@69 266 ~FdObserver() noexcept(false);
jpayne@69 267
jpayne@69 268 KJ_DISALLOW_COPY_AND_MOVE(FdObserver);
jpayne@69 269
jpayne@69 270 Promise<void> whenBecomesReadable();
jpayne@69 271 // Resolves the next time the file descriptor transitions from having no data to read to having
jpayne@69 272 // some data to read.
jpayne@69 273 //
jpayne@69 274 // KJ uses "edge-triggered" event notification whenever possible. As a result, it is an error
jpayne@69 275 // to call this method when there is already data in the read buffer which has been there since
jpayne@69 276 // prior to the last turn of the event loop or prior to creation FdWatcher. In this case, it is
jpayne@69 277 // unspecified whether the promise will ever resolve -- it depends on the underlying event
jpayne@69 278 // mechanism being used.
jpayne@69 279 //
jpayne@69 280 // In order to avoid this problem, make sure that you only call `whenBecomesReadable()`
jpayne@69 281 // only at times when you know the buffer is empty. You know this for sure when one of the
jpayne@69 282 // following happens:
jpayne@69 283 // * read() or recv() fails with EAGAIN or EWOULDBLOCK. (You MUST have non-blocking mode
jpayne@69 284 // enabled on the fd!)
jpayne@69 285 // * The file descriptor is a regular byte-oriented object (like a socket or pipe),
jpayne@69 286 // read() or recv() returns fewer than the number of bytes requested, and `atEndHint()`
jpayne@69 287 // returns false. This can only happen if the buffer is empty but EOF is not reached. (Note,
jpayne@69 288 // though, that for record-oriented file descriptors like Linux's inotify interface, this
jpayne@69 289 // rule does not hold, because it could simply be that the next record did not fit into the
jpayne@69 290 // space available.)
jpayne@69 291 //
jpayne@69 292 // It is an error to call `whenBecomesReadable()` again when the promise returned previously
jpayne@69 293 // has not yet resolved. If you do this, the previous promise may throw an exception.
jpayne@69 294
jpayne@69 295 inline Maybe<bool> atEndHint() { return atEnd; }
jpayne@69 296 // Returns true if the event system has indicated that EOF has been received. There may still
jpayne@69 297 // be data in the read buffer, but once that is gone, there's nothing left.
jpayne@69 298 //
jpayne@69 299 // Returns false if the event system has indicated that EOF had NOT been received as of the
jpayne@69 300 // last turn of the event loop.
jpayne@69 301 //
jpayne@69 302 // Returns nullptr if the event system does not know whether EOF has been reached. In this
jpayne@69 303 // case, the only way to know for sure is to call read() or recv() and check if it returns
jpayne@69 304 // zero.
jpayne@69 305 //
jpayne@69 306 // This hint may be useful as an optimization to avoid an unnecessary system call.
jpayne@69 307
jpayne@69 308 Promise<void> whenBecomesWritable();
jpayne@69 309 // Resolves the next time the file descriptor transitions from having no space available in the
jpayne@69 310 // write buffer to having some space available.
jpayne@69 311 //
jpayne@69 312 // KJ uses "edge-triggered" event notification whenever possible. As a result, it is an error
jpayne@69 313 // to call this method when there is already space in the write buffer which has been there
jpayne@69 314 // since prior to the last turn of the event loop or prior to creation FdWatcher. In this case,
jpayne@69 315 // it is unspecified whether the promise will ever resolve -- it depends on the underlying
jpayne@69 316 // event mechanism being used.
jpayne@69 317 //
jpayne@69 318 // In order to avoid this problem, make sure that you only call `whenBecomesWritable()`
jpayne@69 319 // only at times when you know the buffer is full. You know this for sure when one of the
jpayne@69 320 // following happens:
jpayne@69 321 // * write() or send() fails with EAGAIN or EWOULDBLOCK. (You MUST have non-blocking mode
jpayne@69 322 // enabled on the fd!)
jpayne@69 323 // * write() or send() succeeds but accepts fewer than the number of bytes provided. This can
jpayne@69 324 // only happen if the buffer is full.
jpayne@69 325 //
jpayne@69 326 // It is an error to call `whenBecomesWritable()` again when the promise returned previously
jpayne@69 327 // has not yet resolved. If you do this, the previous promise may throw an exception.
jpayne@69 328
jpayne@69 329 Promise<void> whenUrgentDataAvailable();
jpayne@69 330 // Resolves the next time the file descriptor's read buffer contains "urgent" data.
jpayne@69 331 //
jpayne@69 332 // The conditions for availability of urgent data are specific to the file descriptor's
jpayne@69 333 // underlying implementation.
jpayne@69 334 //
jpayne@69 335 // It is an error to call `whenUrgentDataAvailable()` again when the promise returned previously
jpayne@69 336 // has not yet resolved. If you do this, the previous promise may throw an exception.
jpayne@69 337 //
jpayne@69 338 // WARNING: This has some known weird behavior on macOS. See
jpayne@69 339 // https://github.com/capnproto/capnproto/issues/374.
jpayne@69 340
jpayne@69 341 Promise<void> whenWriteDisconnected();
jpayne@69 342 // Resolves when poll() on the file descriptor reports POLLHUP or POLLERR.
jpayne@69 343
jpayne@69 344 private:
jpayne@69 345 UnixEventPort& eventPort;
jpayne@69 346 int fd;
jpayne@69 347 uint flags;
jpayne@69 348
jpayne@69 349 kj::Maybe<Own<PromiseFulfiller<void>>> readFulfiller;
jpayne@69 350 kj::Maybe<Own<PromiseFulfiller<void>>> writeFulfiller;
jpayne@69 351 kj::Maybe<Own<PromiseFulfiller<void>>> urgentFulfiller;
jpayne@69 352 kj::Maybe<Own<PromiseFulfiller<void>>> hupFulfiller;
jpayne@69 353 // Replaced each time `whenBecomesReadable()` or `whenBecomesWritable()` is called. Reverted to
jpayne@69 354 // null every time an event is fired.
jpayne@69 355
jpayne@69 356 Maybe<bool> atEnd;
jpayne@69 357
jpayne@69 358 #if KJ_USE_KQUEUE
jpayne@69 359 void fire(struct kevent event);
jpayne@69 360 #else
jpayne@69 361 void fire(short events);
jpayne@69 362 #endif
jpayne@69 363
jpayne@69 364 #if !KJ_USE_EPOLL
jpayne@69 365 FdObserver* next;
jpayne@69 366 FdObserver** prev;
jpayne@69 367 // Linked list of observers which currently have a non-null readFulfiller or writeFulfiller.
jpayne@69 368 // If `prev` is null then the observer is not currently in the list.
jpayne@69 369
jpayne@69 370 short getEventMask();
jpayne@69 371 #endif
jpayne@69 372
jpayne@69 373 friend class UnixEventPort;
jpayne@69 374 };
jpayne@69 375
jpayne@69 376 } // namespace kj
jpayne@69 377
jpayne@69 378 KJ_END_HEADER