Skip to content

Commit e939659

Browse files
committed
Fix ordering of pthread_create(pthread_t *thread)
This change fixes a bug where signal_latency_async_test would flake less than 1/1000 of the time. What was happening was pthread_kill(sender_thr) would return EFAULT. This was because pthread_create() was not returning the thread object pointer until after clone() had been called. So it was actually possible for the main thread to stall after calling clone() and during that time the receiver would launch and receive a signal from the sender thread, and then fail when it tried to send a pong. I thought I'd use a barrier at first, in the test, to synchronize thread creation, but I firmly believe that pthread_create() was to blame and now that's fixed
1 parent ed6d133 commit e939659

File tree

6 files changed

+57
-63
lines changed

6 files changed

+57
-63
lines changed

libc/intrin/stack.c

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ relegated bool TellOpenbsdThisIsStackMemory(void *addr, size_t size) {
492492

493493
// OpenBSD only permits RSP to occupy memory that's been explicitly
494494
// defined as stack memory, i.e. `lo <= %rsp < hi` must be the case
495-
relegated errno_t FixupCustomStackOnOpenbsd(pthread_attr_t *attr) {
495+
relegated bool FixupCustomStackOnOpenbsd(pthread_attr_t *attr) {
496496

497497
// get interval
498498
uintptr_t lo = (uintptr_t)attr->__stackaddr;
@@ -503,15 +503,11 @@ relegated errno_t FixupCustomStackOnOpenbsd(pthread_attr_t *attr) {
503503
hi = hi & -__pagesize;
504504

505505
// tell os it's stack memory
506-
errno_t olderr = errno;
507-
if (!TellOpenbsdThisIsStackMemory((void *)lo, hi - lo)) {
508-
errno_t err = errno;
509-
errno = olderr;
510-
return err;
511-
}
506+
if (!TellOpenbsdThisIsStackMemory((void *)lo, hi - lo))
507+
return false;
512508

513509
// update attributes with usable stack address
514510
attr->__stackaddr = (void *)lo;
515511
attr->__stacksize = hi - lo;
516-
return 0;
512+
return true;
517513
}

libc/intrin/stack.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ void cosmo_stack_unlock(void);
88
void cosmo_stack_wipe(void);
99

1010
bool TellOpenbsdThisIsStackMemory(void *, size_t);
11-
errno_t FixupCustomStackOnOpenbsd(pthread_attr_t *);
11+
bool FixupCustomStackOnOpenbsd(pthread_attr_t *);
1212

1313
COSMOPOLITAN_C_END_
1414
#endif /* COSMOPOLITAN_LIBC_STACK_H_ */

libc/thread/posixthread.internal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ forceinline pureconst struct PosixThread *_pthread_self(void) {
128128
}
129129

130130
forceinline void _pthread_ref(struct PosixThread *pt) {
131-
atomic_fetch_add_explicit(&pt->pt_refs, 1, memory_order_acq_rel);
131+
atomic_fetch_add_explicit(&pt->pt_refs, 1, memory_order_relaxed);
132132
}
133133

134134
forceinline void _pthread_unref(struct PosixThread *pt) {

libc/thread/pthread_create.c

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -199,14 +199,12 @@ static errno_t pthread_create_impl(pthread_t *thread,
199199
const pthread_attr_t *attr,
200200
void *(*start_routine)(void *), void *arg,
201201
sigset_t oldsigs) {
202-
int rc, e = errno;
202+
errno_t err;
203203
struct PosixThread *pt;
204204

205205
// create posix thread object
206-
if (!(pt = calloc(1, sizeof(struct PosixThread)))) {
207-
errno = e;
206+
if (!(pt = calloc(1, sizeof(struct PosixThread))))
208207
return EAGAIN;
209-
}
210208
dll_init(&pt->list);
211209
pt->pt_locale = &__global_locale;
212210
pt->pt_start = start_routine;
@@ -215,7 +213,6 @@ static errno_t pthread_create_impl(pthread_t *thread,
215213
// create thread local storage memory
216214
if (!(pt->pt_tls = _mktls(&pt->tib))) {
217215
free(pt);
218-
errno = e;
219216
return EAGAIN;
220217
}
221218

@@ -232,9 +229,9 @@ static errno_t pthread_create_impl(pthread_t *thread,
232229
// caller supplied their own stack
233230
// assume they know what they're doing as much as possible
234231
if (IsOpenbsd()) {
235-
if ((rc = FixupCustomStackOnOpenbsd(&pt->pt_attr))) {
232+
if (!FixupCustomStackOnOpenbsd(&pt->pt_attr)) {
236233
_pthread_free(pt);
237-
return rc;
234+
return EPERM;
238235
}
239236
}
240237
} else {
@@ -259,7 +256,7 @@ static errno_t pthread_create_impl(pthread_t *thread,
259256
if (!(pt->pt_attr.__sigaltstackaddr =
260257
malloc(pt->pt_attr.__sigaltstacksize))) {
261258
_pthread_free(pt);
262-
return errno;
259+
return EAGAIN;
263260
}
264261
pt->pt_flags |= PT_OWNSIGALTSTACK;
265262
}
@@ -282,35 +279,41 @@ static errno_t pthread_create_impl(pthread_t *thread,
282279
memory_order_relaxed);
283280
break;
284281
default:
285-
_pthread_free(pt);
286-
return EINVAL;
282+
// pthread_attr_setdetachstate() makes this impossible
283+
__builtin_unreachable();
287284
}
288285

286+
// if pthread_attr_setdetachstate() was used then it's possible for
287+
// the `pt` object to be freed before this clone call has returned!
288+
atomic_store_explicit(&pt->pt_refs, 1, memory_order_relaxed);
289+
289290
// add thread to global list
290291
// we add it to the beginning since zombies go at the end
291292
_pthread_lock();
292293
dll_make_first(&_pthread_list, &pt->list);
293294
_pthread_unlock();
294295

295-
// if pthread_attr_setdetachstate() was used then it's possible for
296-
// the `pt` object to be freed before this clone call has returned!
297-
_pthread_ref(pt);
296+
// we don't normally do this, but it's important to write the result
297+
// memory before spawning the thread, so it's visible to the threads
298+
*thread = (pthread_t)pt;
298299

299300
// launch PosixThread(pt) in new thread
300-
if ((rc = clone(
301+
if ((err = clone(
301302
PosixThread, pt->pt_attr.__stackaddr, pt->pt_attr.__stacksize,
302303
CLONE_VM | CLONE_THREAD | CLONE_FS | CLONE_FILES | CLONE_SIGHAND |
303304
CLONE_SYSVSEM | CLONE_SETTLS | CLONE_PARENT_SETTID |
304305
CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID,
305306
pt, &pt->tib->tib_ptid, __adj_tls(pt->tib), &pt->tib->tib_ctid))) {
307+
*thread = 0; // posix doesn't require we do this
306308
_pthread_lock();
307309
dll_remove(&_pthread_list, &pt->list);
308310
_pthread_unlock();
309311
_pthread_free(pt);
310-
return rc;
312+
if (err == ENOMEM)
313+
err = EAGAIN;
314+
return err;
311315
}
312316

313-
*thread = (pthread_t)pt;
314317
return 0;
315318
}
316319

@@ -359,7 +362,7 @@ static const char *DescribeHandle(char buf[12], errno_t err, pthread_t *th) {
359362
* └──────────────┘
360363
*
361364
* @param thread is used to output the thread id upon success, which
362-
* must be non-null
365+
* must be non-null; upon failure, its value is undefined
363366
* @param attr points to launch configuration, or may be null
364367
* to use sensible defaults; it must be initialized using
365368
* pthread_attr_init()
@@ -375,14 +378,18 @@ static const char *DescribeHandle(char buf[12], errno_t err, pthread_t *th) {
375378
errno_t pthread_create(pthread_t *thread, const pthread_attr_t *attr,
376379
void *(*start_routine)(void *), void *arg) {
377380
errno_t err;
381+
errno_t olderr = errno;
378382
_pthread_decimate(kPosixThreadZombie);
379383
BLOCK_SIGNALS;
380384
err = pthread_create_impl(thread, attr, start_routine, arg, _SigMask);
381385
ALLOW_SIGNALS;
382386
STRACE("pthread_create([%s], %p, %t, %p) → %s",
383387
DescribeHandle(alloca(12), err, thread), attr, start_routine, arg,
384388
DescribeErrno(err));
385-
if (!err)
389+
if (!err) {
386390
_pthread_unref(*(struct PosixThread **)thread);
391+
} else {
392+
errno = olderr;
393+
}
387394
return err;
388395
}

test/posix/signal_latency_async_test.c

Lines changed: 15 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,10 @@ void receiver_signal_handler(int signo) {
4040
}
4141

4242
void *sender_func(void *arg) {
43-
4443
for (int i = 0; i < ITERATIONS; i++) {
4544

4645
// Wait a bit sometimes
47-
if (rand() % 2 == 1) {
46+
if (rand() % 2) {
4847
volatile unsigned v = 0;
4948
for (;;)
5049
if (++v == 4000)
@@ -67,32 +66,25 @@ void *sender_func(void *arg) {
6766
}
6867

6968
void *receiver_func(void *arg) {
70-
71-
// Wait for asynchronous signals
72-
for (;;) {
69+
static int iteration = 0;
70+
do {
71+
// wait for signal handler to be called
7372
if (atomic_exchange_explicit(&receiver_got_signal, 0,
7473
memory_order_acq_rel)) {
74+
75+
// record received time
7576
struct timespec receive_time;
7677
clock_gettime(CLOCK_MONOTONIC, &receive_time);
77-
7878
long sec_diff = receive_time.tv_sec - send_time.tv_sec;
7979
long nsec_diff = receive_time.tv_nsec - send_time.tv_nsec;
8080
double latency_ns = sec_diff * 1e9 + nsec_diff;
81+
latencies[iteration++] = latency_ns;
8182

82-
static int iteration = 0;
83-
if (iteration < ITERATIONS)
84-
latencies[iteration++] = latency_ns;
85-
86-
// Pong sender
83+
// pong sender
8784
if (pthread_kill(sender_thread, SIGUSR2))
8885
exit(2);
89-
90-
// Exit if done
91-
if (iteration >= ITERATIONS)
92-
pthread_exit(0);
9386
}
94-
}
95-
87+
} while (iteration < ITERATIONS);
9688
return 0;
9789
}
9890

@@ -108,11 +100,7 @@ int compare(const void *a, const void *b) {
108100

109101
int main() {
110102

111-
// TODO(jart): fix flakes
112-
if (1)
113-
return 0;
114-
115-
// Install signal handlers
103+
// install handlers
116104
struct sigaction sa;
117105
sa.sa_handler = receiver_signal_handler;
118106
sa.sa_flags = 0;
@@ -121,27 +109,27 @@ int main() {
121109
sa.sa_handler = sender_signal_handler;
122110
sigaction(SIGUSR2, &sa, 0);
123111

124-
// Create receiver thread first
112+
// create receiver thread first
125113
if (pthread_create(&receiver_thread, 0, receiver_func, 0))
126114
exit(11);
127115

128-
// Create sender thread
116+
// create sender thread
129117
if (pthread_create(&sender_thread, 0, sender_func, 0))
130118
exit(12);
131119

132-
// Wait for threads to finish
120+
// wait for threads to finish
133121
if (pthread_join(sender_thread, 0))
134122
exit(13);
135123
if (pthread_join(receiver_thread, 0))
136124
exit(14);
137125

138-
// Compute mean latency
126+
// compute mean latency
139127
double total_latency = 0;
140128
for (int i = 0; i < ITERATIONS; i++)
141129
total_latency += latencies[i];
142130
double mean_latency = total_latency / ITERATIONS;
143131

144-
// Sort latencies to compute percentiles
132+
// sort latencies to compute percentiles
145133
qsort(latencies, ITERATIONS, sizeof(double), compare);
146134

147135
double p50 = latencies[(int)(0.50 * ITERATIONS)];

tool/scripts/flakes

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,20 @@ import concurrent.futures
66
from collections import Counter
77
from typing import List, Dict, Tuple
88

9-
NUM_PARALLEL = int(os.cpu_count() * 1.5)
9+
NUM_PARALLEL = int(os.cpu_count() * 20)
1010

11-
def find_test_files(root_dir: str) -> List[str]:
11+
def find_test_files(root: str) -> List[str]:
1212
"""Find all executable files ending with _test recursively."""
1313
test_files = []
14-
for root, _, files in os.walk(root_dir):
15-
for file in files:
16-
if file.endswith('_test'):
17-
file_path = os.path.join(root, file)
18-
if os.access(file_path, os.X_OK):
19-
test_files.append(file_path)
14+
if os.path.isdir(root):
15+
for root, _, files in os.walk(root):
16+
for file in files:
17+
if file.endswith('_test'):
18+
file_path = os.path.join(root, file)
19+
if os.access(file_path, os.X_OK):
20+
test_files.append(file_path)
21+
elif root.endswith('_test'):
22+
test_files.append(root)
2023
return test_files
2124

2225
def run_single_test(test_path: str) -> int:

0 commit comments

Comments
 (0)