Skip to content

Commit 2f48a02

Browse files
committed
Make recursive mutexes faster
Recursive mutexes now go as fast as normal mutexes. The tradeoff is they are no longer safe to use in signal handlers. However you can still have signal safe mutexes if you set your mutex to both recursive and pshared. You can also make functions that use recursive mutexes signal safe using sigprocmask to ensure recursion doesn't happen due to any signal handler The impact of this change is that, on Windows, many functions which edit the file descriptor table rely on recursive mutexes, e.g. open(). If you develop your app so it uses pread() and pwrite() then your app should go very fast when performing a heavily multithreaded and contended workload For example, when scaling to 40+ cores, *NSYNC mutexes can go as much as 1000x faster (in CPU time) than the naive recursive lock implementation. Now recursive will use *NSYNC under the hood when it's possible to do so
1 parent 58d252f commit 2f48a02

37 files changed

+2684
-2209
lines changed

libc/calls/clock_nanosleep.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@
5858
* @param clock may be
5959
* - `CLOCK_REALTIME`
6060
* - `CLOCK_MONOTONIC`
61+
* - `CLOCK_REALTIME_COARSE` but is likely to sleep negative time
62+
* - `CLOCK_MONTONIC_COARSE` but is likely to sleep negative time
6163
* @param flags can be 0 for relative and `TIMER_ABSTIME` for absolute
6264
* @param req can be a relative or absolute time, depending on `flags`
6365
* @param rem shall be updated with the remainder of unslept time when

libc/intrin/pthread_mutex_lock.c

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,37 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
111111
}
112112
}
113113

114+
#if PTHREAD_USE_NSYNC
115+
static errno_t pthread_mutex_lock_recursive_nsync(pthread_mutex_t *mutex,
116+
uint64_t word) {
117+
int me = gettid();
118+
for (;;) {
119+
if (MUTEX_OWNER(word) == me) {
120+
if (MUTEX_TYPE(word) != PTHREAD_MUTEX_ERRORCHECK) {
121+
if (MUTEX_DEPTH(word) < MUTEX_DEPTH_MAX) {
122+
if (atomic_compare_exchange_weak_explicit(
123+
&mutex->_word, &word, MUTEX_INC_DEPTH(word),
124+
memory_order_relaxed, memory_order_relaxed))
125+
return 0;
126+
continue;
127+
} else {
128+
return EAGAIN;
129+
}
130+
} else {
131+
return EDEADLK;
132+
}
133+
}
134+
_weaken(nsync_mu_lock)((nsync_mu *)mutex->_nsyncx);
135+
word = MUTEX_UNLOCK(word);
136+
word = MUTEX_LOCK(word);
137+
word = MUTEX_SET_OWNER(word, me);
138+
mutex->_word = word;
139+
mutex->_pid = __pid;
140+
return 0;
141+
}
142+
}
143+
#endif
144+
114145
static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
115146
uint64_t word;
116147

@@ -141,8 +172,17 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
141172
return 0;
142173
}
143174

144-
// handle recursive and error checking mutexes
175+
// handle recursive and error checking mutexes
176+
#if PTHREAD_USE_NSYNC
177+
if (_weaken(nsync_mu_lock) &&
178+
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE) {
179+
return pthread_mutex_lock_recursive_nsync(mutex, word);
180+
} else {
181+
return pthread_mutex_lock_recursive(mutex, word);
182+
}
183+
#else
145184
return pthread_mutex_lock_recursive(mutex, word);
185+
#endif
146186
}
147187

148188
/**

libc/intrin/pthread_mutex_trylock.c

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,38 @@ static errno_t pthread_mutex_trylock_recursive(pthread_mutex_t *mutex,
7474
}
7575
}
7676

77+
static errno_t pthread_mutex_trylock_recursive_nsync(pthread_mutex_t *mutex,
78+
uint64_t word) {
79+
int me = gettid();
80+
for (;;) {
81+
if (MUTEX_OWNER(word) == me) {
82+
if (MUTEX_TYPE(word) != PTHREAD_MUTEX_ERRORCHECK) {
83+
if (MUTEX_DEPTH(word) < MUTEX_DEPTH_MAX) {
84+
if (atomic_compare_exchange_weak_explicit(
85+
&mutex->_word, &word, MUTEX_INC_DEPTH(word),
86+
memory_order_relaxed, memory_order_relaxed))
87+
return 0;
88+
continue;
89+
} else {
90+
return EAGAIN;
91+
}
92+
} else {
93+
return EDEADLK;
94+
}
95+
}
96+
if (_weaken(nsync_mu_trylock)((nsync_mu *)mutex->_nsyncx)) {
97+
word = MUTEX_UNLOCK(word);
98+
word = MUTEX_LOCK(word);
99+
word = MUTEX_SET_OWNER(word, me);
100+
mutex->_word = word;
101+
mutex->_pid = __pid;
102+
return 0;
103+
} else {
104+
return EBUSY;
105+
}
106+
}
107+
}
108+
77109
/**
78110
* Attempts acquiring lock.
79111
*
@@ -119,5 +151,14 @@ errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
119151
}
120152

121153
// handle recursive and error checking mutexes
154+
#if PTHREAD_USE_NSYNC
155+
if (_weaken(nsync_mu_trylock) &&
156+
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE) {
157+
return pthread_mutex_trylock_recursive_nsync(mutex, word);
158+
} else {
159+
return pthread_mutex_trylock_recursive(mutex, word);
160+
}
161+
#else
122162
return pthread_mutex_trylock_recursive(mutex, word);
163+
#endif
123164
}

libc/intrin/pthread_mutex_unlock.c

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
│ PERFORMANCE OF THIS SOFTWARE. │
1818
╚─────────────────────────────────────────────────────────────────────────────*/
1919
#include "libc/calls/calls.h"
20+
#include "libc/calls/state.internal.h"
2021
#include "libc/dce.h"
2122
#include "libc/errno.h"
2223
#include "libc/intrin/atomic.h"
@@ -69,6 +70,35 @@ static errno_t pthread_mutex_unlock_recursive(pthread_mutex_t *mutex,
6970
}
7071
}
7172

73+
#if PTHREAD_USE_NSYNC
74+
static errno_t pthread_mutex_unlock_recursive_nsync(pthread_mutex_t *mutex,
75+
uint64_t word) {
76+
int me = gettid();
77+
for (;;) {
78+
79+
// we allow unlocking an initialized lock that wasn't locked, but we
80+
// don't allow unlocking a lock held by another thread, or unlocking
81+
// recursive locks from a forked child, since it should be re-init'd
82+
if (MUTEX_OWNER(word) && (MUTEX_OWNER(word) != me || mutex->_pid != __pid))
83+
return EPERM;
84+
85+
// check if this is a nested lock with signal safety
86+
if (MUTEX_DEPTH(word)) {
87+
if (atomic_compare_exchange_strong_explicit(
88+
&mutex->_word, &word, MUTEX_DEC_DEPTH(word), memory_order_relaxed,
89+
memory_order_relaxed))
90+
return 0;
91+
continue;
92+
}
93+
94+
// actually unlock the mutex
95+
mutex->_word = MUTEX_UNLOCK(word);
96+
_weaken(nsync_mu_unlock)((nsync_mu *)mutex->_nsyncx);
97+
return 0;
98+
}
99+
}
100+
#endif
101+
72102
/**
73103
* Releases mutex.
74104
*
@@ -81,6 +111,11 @@ static errno_t pthread_mutex_unlock_recursive(pthread_mutex_t *mutex,
81111
errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
82112
uint64_t word;
83113

114+
if (__vforked) {
115+
LOCKTRACE("skipping pthread_mutex_lock(%t) due to vfork", mutex);
116+
return 0;
117+
}
118+
84119
LOCKTRACE("pthread_mutex_unlock(%t)", mutex);
85120

86121
// get current state of lock
@@ -111,5 +146,14 @@ errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
111146
}
112147

113148
// handle recursive and error checking mutexes
149+
#if PTHREAD_USE_NSYNC
150+
if (_weaken(nsync_mu_unlock) &&
151+
MUTEX_PSHARED(word) == PTHREAD_PROCESS_PRIVATE) {
152+
return pthread_mutex_unlock_recursive_nsync(mutex, word);
153+
} else {
154+
return pthread_mutex_unlock_recursive(mutex, word);
155+
}
156+
#else
114157
return pthread_mutex_unlock_recursive(mutex, word);
158+
#endif
115159
}

libc/intrin/pthread_mutexattr_settype.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
*
2626
* @param type can be one of
2727
* - `PTHREAD_MUTEX_NORMAL`
28-
* - `PTHREAD_MUTEX_DEFAULT`
2928
* - `PTHREAD_MUTEX_RECURSIVE`
3029
* - `PTHREAD_MUTEX_ERRORCHECK`
3130
* @return 0 on success, or error on failure

libc/intrin/reservefd.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818
╚─────────────────────────────────────────────────────────────────────────────*/
1919
#include "libc/calls/internal.h"
2020
#include "libc/calls/state.internal.h"
21-
#include "libc/intrin/fds.h"
2221
#include "libc/intrin/atomic.h"
2322
#include "libc/intrin/cmpxchg.h"
2423
#include "libc/intrin/extend.h"
24+
#include "libc/intrin/fds.h"
2525
#include "libc/macros.h"
2626
#include "libc/runtime/memtrack.internal.h"
2727
#include "libc/str/str.h"
@@ -47,7 +47,7 @@ int __ensurefds_unlocked(int fd) {
4747

4848
/**
4949
* Grows file descriptor array memory if needed.
50-
* @asyncsignalsafe
50+
* @asyncsignalsafe if signals are blocked
5151
*/
5252
int __ensurefds(int fd) {
5353
__fds_lock();
@@ -82,7 +82,7 @@ int __reservefd_unlocked(int start) {
8282

8383
/**
8484
* Finds open file descriptor slot.
85-
* @asyncsignalsafe
85+
* @asyncsignalsafe if signals are blocked
8686
*/
8787
int __reservefd(int start) {
8888
int fd;

libc/sock/socketpair-nt.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
╚─────────────────────────────────────────────────────────────────────────────*/
1919
#include "libc/calls/internal.h"
2020
#include "libc/calls/state.internal.h"
21+
#include "libc/calls/struct/sigset.internal.h"
2122
#include "libc/calls/syscall_support-nt.internal.h"
2223
#include "libc/nt/createfile.h"
2324
#include "libc/nt/enum/accessmask.h"
@@ -33,7 +34,8 @@
3334
#include "libc/sysv/errfuns.h"
3435
#ifdef __x86_64__
3536

36-
textwindows int sys_socketpair_nt(int family, int type, int proto, int sv[2]) {
37+
textwindows static int sys_socketpair_nt_impl(int family, int type, int proto,
38+
int sv[2]) {
3739
uint32_t mode;
3840
int64_t hpipe, h1;
3941
char16_t pipename[64];
@@ -111,4 +113,12 @@ textwindows int sys_socketpair_nt(int family, int type, int proto, int sv[2]) {
111113
return rc;
112114
}
113115

116+
textwindows int sys_socketpair_nt(int family, int type, int proto, int sv[2]) {
117+
int rc;
118+
BLOCK_SIGNALS;
119+
rc = sys_socketpair_nt_impl(family, type, proto, sv);
120+
ALLOW_SIGNALS;
121+
return rc;
122+
}
123+
114124
#endif /* __x86_64__ */

libc/thread/pthread_cond_init.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
1717
│ PERFORMANCE OF THIS SOFTWARE. │
1818
╚─────────────────────────────────────────────────────────────────────────────*/
19+
#include "libc/dce.h"
1920
#include "libc/sysv/consts/clock.h"
2021
#include "libc/thread/thread.h"
2122

@@ -29,8 +30,11 @@ errno_t pthread_cond_init(pthread_cond_t *cond,
2930
const pthread_condattr_t *attr) {
3031
*cond = (pthread_cond_t){0};
3132
if (attr) {
33+
cond->_footek = IsXnuSilicon() || attr->_pshared;
3234
cond->_pshared = attr->_pshared;
3335
cond->_clock = attr->_clock;
36+
} else {
37+
cond->_footek = IsXnuSilicon();
3438
}
3539
return 0;
3640
}

libc/thread/pthread_cond_signal.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,14 @@
4242
errno_t pthread_cond_signal(pthread_cond_t *cond) {
4343

4444
#if PTHREAD_USE_NSYNC
45+
// do nothing if pthread_cond_timedwait() hasn't been called yet
46+
// this is because we dont know for certain if nsync is safe
47+
if (!atomic_load_explicit(&cond->_waited, memory_order_acquire))
48+
return 0;
49+
4550
// favor *NSYNC if this is a process private condition variable
4651
// if using Mike Burrows' code isn't possible, use a naive impl
47-
if (!cond->_pshared && !IsXnuSilicon()) {
52+
if (!cond->_footek) {
4853
nsync_cv_signal((nsync_cv *)cond);
4954
return 0;
5055
}

libc/thread/pthread_cond_timedwait.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "libc/calls/cp.internal.h"
2121
#include "libc/dce.h"
2222
#include "libc/errno.h"
23+
#include "libc/intrin/atomic.h"
2324
#include "libc/sysv/consts/clock.h"
2425
#include "libc/thread/lock.h"
2526
#include "libc/thread/posixthread.internal.h"
@@ -116,17 +117,30 @@ errno_t pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex,
116117
MUTEX_OWNER(muword) != gettid())
117118
return EPERM;
118119

119-
// if condition variable is shared then mutex must be too
120-
if (cond->_pshared)
121-
if (MUTEX_PSHARED(muword) != PTHREAD_PROCESS_SHARED)
120+
#if PTHREAD_USE_NSYNC
121+
// the first time pthread_cond_timedwait() is called we learn if the
122+
// associated mutex is normal and private. that means *NSYNC is safe
123+
// this decision is permanent. you can't use a recursive mutex later
124+
if (!atomic_load_explicit(&cond->_waited, memory_order_acquire)) {
125+
if (!cond->_footek)
126+
if (MUTEX_TYPE(muword) != PTHREAD_MUTEX_NORMAL ||
127+
MUTEX_PSHARED(muword) != PTHREAD_PROCESS_PRIVATE)
128+
cond->_footek = true;
129+
atomic_store_explicit(&cond->_waited, true, memory_order_release);
130+
} else if (!cond->_footek) {
131+
if (MUTEX_TYPE(muword) != PTHREAD_MUTEX_NORMAL ||
132+
MUTEX_PSHARED(muword) != PTHREAD_PROCESS_PRIVATE)
122133
return EINVAL;
134+
}
135+
#endif
123136

137+
// now perform the actual wait
124138
errno_t err;
125139
BEGIN_CANCELATION_POINT;
126140
#if PTHREAD_USE_NSYNC
127141
// favor *NSYNC if this is a process private condition variable
128142
// if using Mike Burrows' code isn't possible, use a naive impl
129-
if (!cond->_pshared && !IsXnuSilicon()) {
143+
if (!cond->_footek) {
130144
err = nsync_cv_wait_with_deadline(
131145
(nsync_cv *)cond, (nsync_mu *)mutex, cond->_clock,
132146
abstime ? *abstime : nsync_time_no_deadline, 0);

0 commit comments

Comments
 (0)