Skip to content

Commit 98c5847

Browse files
committed
Fix fork waiter leak in nsync
This change fixes a bug where nsync waiter objects would leak. It'd mean that long-running programs like runitd would run out of file descriptors on NetBSD where waiter objects have ksem file descriptors. On other OSes this bug is mostly harmless since the worst that can happen with a futex is to leak a little bit of ram. The bug was caused because tib_nsync was sneaking back in after the finalization code had cleared it. This change refactors the thread exiting code to handle nsync teardown appropriately and in making this change I found another issue, which is that user code which is buggy, and tries to exit without joining joinable threads which haven't been detached, would result in a deadlock. That doesn't sound so bad, except the main thread is a joinable thread. So this deadlock would be triggered in ways that put libc at fault. So we now auto-join threads and libc will log a warning to --strace when that happens for any thread
1 parent fd7da58 commit 98c5847

35 files changed

+299
-173
lines changed

libc/intrin/gettid.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
int gettid(void) {
4040
int tid;
4141
if (VERY_LIKELY(__tls_enabled && !__vforked)) {
42-
tid = atomic_load_explicit(&__get_tls()->tib_tid, memory_order_relaxed);
42+
tid = atomic_load_explicit(&__get_tls()->tib_ptid, memory_order_relaxed);
4343
if (VERY_LIKELY(tid > 0))
4444
return tid;
4545
}

libc/intrin/kprintf.greg.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,7 @@ ABI static size_t kformat(char *b, size_t n, const char *fmt, va_list va) {
561561
tib = __tls_enabled ? __get_tls_privileged() : 0;
562562
if (!(tib && (tib->tib_flags & TIB_FLAG_VFORKED))) {
563563
if (tib) {
564-
x = atomic_load_explicit(&tib->tib_tid, memory_order_relaxed);
564+
x = atomic_load_explicit(&tib->tib_ptid, memory_order_relaxed);
565565
} else {
566566
x = __pid;
567567
}

libc/intrin/maps.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ bool __maps_held(void) {
129129
return __tls_enabled && !(__get_tls()->tib_flags & TIB_FLAG_VFORKED) &&
130130
MUTEX_OWNER(
131131
atomic_load_explicit(&__maps.lock.word, memory_order_relaxed)) ==
132-
atomic_load_explicit(&__get_tls()->tib_tid, memory_order_relaxed);
132+
atomic_load_explicit(&__get_tls()->tib_ptid, memory_order_relaxed);
133133
}
134134

135135
ABI void __maps_lock(void) {
@@ -142,7 +142,7 @@ ABI void __maps_lock(void) {
142142
return;
143143
if (tib->tib_flags & TIB_FLAG_VFORKED)
144144
return;
145-
me = atomic_load_explicit(&tib->tib_tid, memory_order_relaxed);
145+
me = atomic_load_explicit(&tib->tib_ptid, memory_order_relaxed);
146146
if (me <= 0)
147147
return;
148148
word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed);
@@ -192,7 +192,7 @@ ABI void __maps_unlock(void) {
192192
return;
193193
if (tib->tib_flags & TIB_FLAG_VFORKED)
194194
return;
195-
me = atomic_load_explicit(&tib->tib_tid, memory_order_relaxed);
195+
me = atomic_load_explicit(&tib->tib_ptid, memory_order_relaxed);
196196
if (me <= 0)
197197
return;
198198
word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed);

libc/intrin/pthread_mutex_lock.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
6969
uint64_t word, bool is_trylock) {
7070
uint64_t lock;
7171
int backoff = 0;
72-
int me = atomic_load_explicit(&__get_tls()->tib_tid, memory_order_relaxed);
72+
int me = atomic_load_explicit(&__get_tls()->tib_ptid, memory_order_relaxed);
7373
bool once = false;
7474
for (;;) {
7575
if (MUTEX_OWNER(word) == me) {
@@ -119,7 +119,7 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
119119
static errno_t pthread_mutex_lock_recursive_nsync(pthread_mutex_t *mutex,
120120
uint64_t word,
121121
bool is_trylock) {
122-
int me = atomic_load_explicit(&__get_tls()->tib_tid, memory_order_relaxed);
122+
int me = atomic_load_explicit(&__get_tls()->tib_ptid, memory_order_relaxed);
123123
for (;;) {
124124
if (MUTEX_OWNER(word) == me) {
125125
if (MUTEX_DEPTH(word) < MUTEX_DEPTH_MAX) {

libc/intrin/pthread_mutex_unlock.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ static void pthread_mutex_unlock_drepper(atomic_int *futex, char pshare) {
4444

4545
static errno_t pthread_mutex_unlock_recursive(pthread_mutex_t *mutex,
4646
uint64_t word) {
47-
int me = atomic_load_explicit(&__get_tls()->tib_tid, memory_order_relaxed);
47+
int me = atomic_load_explicit(&__get_tls()->tib_ptid, memory_order_relaxed);
4848
for (;;) {
4949

5050
// we allow unlocking an initialized lock that wasn't locked, but we
@@ -76,7 +76,7 @@ static errno_t pthread_mutex_unlock_recursive(pthread_mutex_t *mutex,
7676
#if PTHREAD_USE_NSYNC
7777
static errno_t pthread_mutex_unlock_recursive_nsync(pthread_mutex_t *mutex,
7878
uint64_t word) {
79-
int me = atomic_load_explicit(&__get_tls()->tib_tid, memory_order_relaxed);
79+
int me = atomic_load_explicit(&__get_tls()->tib_ptid, memory_order_relaxed);
8080
for (;;) {
8181

8282
// we allow unlocking an initialized lock that wasn't locked, but we

libc/intrin/pthread_tid.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,25 @@
2121
#include "libc/thread/posixthread.internal.h"
2222
#include "libc/thread/thread.h"
2323

24+
//
25+
// - tib_ptid: always guaranteed to be non-zero in thread itself. on
26+
// some platforms (e.g. xnu) the parent thread and other
27+
// threads may need to wait for this value to be set. this
28+
// is generally the value you want to read to get the tid.
29+
//
30+
// - tib_ctid: starts off as -1. once thread starts, it's set to the
31+
// thread's tid before calling the thread callback. when
32+
// thread is done executing, this is set to zero, and then
33+
// this address is futex woken, in case the parent thread or
34+
// any other thread is waiting on its completion. when a
35+
// thread wants to read its own tid, it shouldn't use this,
36+
// because the thread might need to do things after clearing
37+
// its own tib_ctid (see pthread_exit() for static thread).
38+
//
2439
int _pthread_tid(struct PosixThread *pt) {
2540
int tid = 0;
26-
while (pt && !(tid = atomic_load_explicit(&pt->ptid, memory_order_acquire)))
41+
while (pt && !(tid = atomic_load_explicit(&pt->tib->tib_ptid,
42+
memory_order_acquire)))
2743
pthread_yield_np();
2844
return tid;
2945
}

libc/intrin/wintlsinit.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
1717
│ PERFORMANCE OF THIS SOFTWARE. │
1818
╚─────────────────────────────────────────────────────────────────────────────*/
19+
#include "libc/intrin/atomic.h"
1920
#include "libc/log/libfatal.internal.h"
2021
#include "libc/nt/thread.h"
2122
#include "libc/nt/thunk/msabi.h"
@@ -38,7 +39,9 @@ textwindows dontinstrument void __bootstrap_tls(struct CosmoTib *tib,
3839
tib->tib_ftrace = __ftrace;
3940
tib->tib_sigstack_size = 57344;
4041
tib->tib_sigstack_addr = bp - 57344;
41-
tib->tib_tid = __imp_GetCurrentThreadId();
42+
int tid = __imp_GetCurrentThreadId();
43+
atomic_init(&tib->tib_ptid, tid);
44+
atomic_init(&tib->tib_ctid, tid);
4245
__set_tls_win32(tib);
4346
}
4447

libc/mem/leaks.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ void CheckForMemoryLeaks(void) {
7979

8080
// validate usage of this api
8181
if (_weaken(_pthread_decimate))
82-
_weaken(_pthread_decimate)();
82+
_weaken(_pthread_decimate)(kPosixThreadZombie);
8383
if (!pthread_orphan_np())
8484
kprintf("warning: called CheckForMemoryLeaks() from non-orphaned thread\n");
8585

libc/proc/fork-nt.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -465,9 +465,6 @@ textwindows int sys_fork_nt(uint32_t dwCreationFlags) {
465465
// re-apply code morphing for function tracing
466466
if (ftrace_stackdigs)
467467
_weaken(__hook)(_weaken(ftrace_hook), _weaken(GetSymbolTable)());
468-
// notify pthread join
469-
atomic_store_explicit(&_pthread_static.ptid, GetCurrentThreadId(),
470-
memory_order_release);
471468
}
472469
if (rc == -1)
473470
dll_make_first(&__proc.free, &proc->elem);

libc/proc/fork.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ extern pthread_mutex_t __sig_worker_lock;
5959

6060
void __dlopen_lock(void);
6161
void __dlopen_unlock(void);
62-
void nsync_mu_semaphore_sem_fork_child(void);
6362

6463
// first and last and always
6564
// it is the lord of all locks
@@ -147,7 +146,6 @@ static void fork_parent(void) {
147146
}
148147

149148
static void fork_child(void) {
150-
nsync_mu_semaphore_sem_fork_child();
151149
_pthread_mutex_wipe_np(&__dlopen_lock_obj);
152150
_pthread_mutex_wipe_np(&__rand64_lock_obj);
153151
_pthread_mutex_wipe_np(&__fds_lock_obj);
@@ -204,8 +202,8 @@ int _fork(uint32_t dwCreationFlags) {
204202
struct CosmoTib *tib = __get_tls();
205203
struct PosixThread *pt = (struct PosixThread *)tib->tib_pthread;
206204
tid = IsLinux() || IsXnuSilicon() ? dx : sys_gettid();
207-
atomic_init(&tib->tib_tid, tid);
208-
atomic_init(&pt->ptid, tid);
205+
atomic_init(&tib->tib_ctid, tid);
206+
atomic_init(&tib->tib_ptid, tid);
209207

210208
// tracing and kisdangerous need this lock wiped a little earlier
211209
atomic_init(&__maps.lock.word, 0);
@@ -214,6 +212,11 @@ int _fork(uint32_t dwCreationFlags) {
214212
* it's now safe to call normal functions again
215213
*/
216214

215+
// this wipe must happen fast
216+
void nsync_waiter_wipe_(void);
217+
if (_weaken(nsync_waiter_wipe_))
218+
_weaken(nsync_waiter_wipe_)();
219+
217220
// turn other threads into zombies
218221
// we can't free() them since we're monopolizing all locks
219222
// we assume the operating system already reclaimed system handles

0 commit comments

Comments
 (0)