Skip to content

Commit 59692b0

Browse files
committed
Make spinlocks faster (take two)
This change is green on x86 and arm test fleet.
1 parent 02e1cbc commit 59692b0

File tree

14 files changed

+122
-79
lines changed

14 files changed

+122
-79
lines changed

libc/intrin/describebacktrace.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@
2424

2525
#define N 160
2626

27-
static bool IsDangerous(const void *ptr) {
27+
privileged static bool IsDangerous(const void *ptr) {
2828
if (_weaken(kisdangerous))
2929
return _weaken(kisdangerous)(ptr);
3030
return false;
3131
}
3232

33-
static char *FormatHex(char *p, unsigned long x) {
33+
privileged static char *FormatHex(char *p, unsigned long x) {
3434
int k = x ? (__builtin_clzl(x) ^ 63) + 1 : 1;
3535
k = (k + 3) & -4;
3636
while (k > 0)
@@ -39,8 +39,8 @@ static char *FormatHex(char *p, unsigned long x) {
3939
return p;
4040
}
4141

42-
dontinstrument const char *(DescribeBacktrace)(char buf[N],
43-
const struct StackFrame *fr) {
42+
privileged dontinstrument const char *(
43+
DescribeBacktrace)(char buf[N], const struct StackFrame *fr) {
4444
char *p = buf;
4545
char *pe = p + N;
4646
bool gotsome = false;

libc/intrin/iscall.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
// returns true if `p` is preceded by x86 call instruction
2222
// this is actually impossible to do but we'll do our best
23-
dontinstrument int __is_call(const unsigned char *p) {
23+
privileged dontinstrument int __is_call(const unsigned char *p) {
2424
if (p[-5] == 0xe8)
2525
return 5; // call Jvds
2626
if (p[-2] == 0xff && (p[-1] & 070) == 020)

libc/intrin/maps.c

Lines changed: 56 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,17 @@
1818
╚─────────────────────────────────────────────────────────────────────────────*/
1919
#include "libc/intrin/maps.h"
2020
#include "ape/sections.internal.h"
21+
#include "libc/calls/state.internal.h"
2122
#include "libc/dce.h"
23+
#include "libc/intrin/describebacktrace.h"
2224
#include "libc/intrin/dll.h"
25+
#include "libc/intrin/kprintf.h"
2326
#include "libc/intrin/maps.h"
2427
#include "libc/runtime/runtime.h"
2528
#include "libc/runtime/stack.h"
2629
#include "libc/sysv/consts/auxv.h"
2730
#include "libc/sysv/consts/prot.h"
31+
#include "libc/thread/lock.h"
2832

2933
#ifdef __x86_64__
3034
__static_yoink("_init_maps");
@@ -85,37 +89,67 @@ void __maps_init(void) {
8589
}
8690

8791
privileged bool __maps_lock(void) {
92+
int me;
93+
uint64_t word, lock;
8894
struct CosmoTib *tib;
8995
if (!__tls_enabled)
9096
return false;
91-
tib = __get_tls_privileged();
92-
if (atomic_fetch_add_explicit(&tib->tib_relock_maps, 1, memory_order_relaxed))
93-
return true;
94-
int backoff = 0;
95-
while (atomic_exchange_explicit(&__maps.lock, 1, memory_order_acquire)) {
96-
if (backoff < 7) {
97-
volatile int i;
98-
for (i = 0; i != 1 << backoff; i++) {
99-
}
100-
backoff++;
101-
} else {
102-
// STRACE("pthread_delay_np(__maps)");
103-
#if defined(__GNUC__) && defined(__aarch64__)
104-
__asm__ volatile("yield");
105-
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
106-
__asm__ volatile("pause");
107-
#endif
97+
if (!(tib = __get_tls_privileged()))
98+
return false;
99+
if (tib->tib_flags & TIB_FLAG_VFORKED)
100+
return false;
101+
me = atomic_load_explicit(&tib->tib_tid, memory_order_acquire);
102+
if (me <= 0)
103+
return false;
104+
word = atomic_load_explicit(&__maps.lock, memory_order_relaxed);
105+
for (;;) {
106+
if (MUTEX_OWNER(word) == me) {
107+
if (atomic_compare_exchange_weak_explicit(
108+
&__maps.lock, &word, MUTEX_INC_DEPTH(word), memory_order_relaxed,
109+
memory_order_relaxed))
110+
return true;
111+
continue;
112+
}
113+
word = 0;
114+
lock = MUTEX_LOCK(word);
115+
lock = MUTEX_SET_OWNER(lock, me);
116+
if (atomic_compare_exchange_weak_explicit(&__maps.lock, &word, lock,
117+
memory_order_acquire,
118+
memory_order_relaxed))
119+
return false;
120+
for (;;) {
121+
word = atomic_load_explicit(&__maps.lock, memory_order_relaxed);
122+
if (MUTEX_OWNER(word) == me)
123+
break;
124+
if (!word)
125+
break;
108126
}
109127
}
110-
return false;
111128
}
112129

113130
privileged void __maps_unlock(void) {
131+
int me;
132+
uint64_t word;
114133
struct CosmoTib *tib;
115134
if (!__tls_enabled)
116135
return;
117-
tib = __get_tls_privileged();
118-
if (atomic_fetch_sub_explicit(&tib->tib_relock_maps, 1,
119-
memory_order_relaxed) == 1)
120-
atomic_store_explicit(&__maps.lock, 0, memory_order_release);
136+
if (!(tib = __get_tls_privileged()))
137+
return;
138+
if (tib->tib_flags & TIB_FLAG_VFORKED)
139+
return;
140+
me = atomic_load_explicit(&tib->tib_tid, memory_order_acquire);
141+
if (me <= 0)
142+
return;
143+
word = atomic_load_explicit(&__maps.lock, memory_order_relaxed);
144+
for (;;) {
145+
if (MUTEX_DEPTH(word)) {
146+
if (atomic_compare_exchange_weak_explicit(
147+
&__maps.lock, &word, MUTEX_DEC_DEPTH(word), memory_order_relaxed,
148+
memory_order_relaxed))
149+
break;
150+
}
151+
if (atomic_compare_exchange_weak_explicit(
152+
&__maps.lock, &word, 0, memory_order_release, memory_order_relaxed))
153+
break;
154+
}
121155
}

libc/intrin/maps.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ struct Map {
2727
};
2828

2929
struct Maps {
30-
atomic_int lock;
3130
struct Tree *maps;
31+
_Atomic(uint64_t) lock;
3232
_Atomic(struct Map *) freed;
3333
size_t count;
3434
size_t pages;

libc/intrin/pthread_mutex_lock.c

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,16 @@
3131
#include "third_party/nsync/futex.internal.h"
3232
#include "third_party/nsync/mu.h"
3333

34-
static void pthread_mutex_lock_naive(pthread_mutex_t *mutex, uint64_t word) {
34+
static void pthread_mutex_lock_spin(atomic_int *word) {
3535
int backoff = 0;
36-
uint64_t lock;
3736
for (;;) {
38-
word = MUTEX_UNLOCK(word);
39-
lock = MUTEX_LOCK(word);
40-
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
41-
memory_order_acquire,
42-
memory_order_relaxed))
43-
return;
44-
backoff = pthread_delay_np(mutex, backoff);
37+
if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
38+
break;
39+
for (;;) {
40+
if (!atomic_load_explicit(word, memory_order_relaxed))
41+
break;
42+
backoff = pthread_delay_np(word, backoff);
43+
}
4544
}
4645
}
4746

@@ -96,7 +95,14 @@ static errno_t pthread_mutex_lock_recursive(pthread_mutex_t *mutex,
9695
mutex->_pid = __pid;
9796
return 0;
9897
}
99-
backoff = pthread_delay_np(mutex, backoff);
98+
for (;;) {
99+
word = atomic_load_explicit(&mutex->_word, memory_order_relaxed);
100+
if (MUTEX_OWNER(word) == me)
101+
break;
102+
if (word == MUTEX_UNLOCK(word))
103+
break;
104+
backoff = pthread_delay_np(mutex, backoff);
105+
}
100106
}
101107
}
102108

@@ -121,7 +127,7 @@ static errno_t pthread_mutex_lock_impl(pthread_mutex_t *mutex) {
121127
if (_weaken(nsync_futex_wait_)) {
122128
pthread_mutex_lock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
123129
} else {
124-
pthread_mutex_lock_naive(mutex, word);
130+
pthread_mutex_lock_spin(&mutex->_futex);
125131
}
126132
return 0;
127133
}

libc/intrin/pthread_mutex_trylock.c

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,8 @@
2727
#include "third_party/nsync/futex.internal.h"
2828
#include "third_party/nsync/mu.h"
2929

30-
static errno_t pthread_mutex_trylock_naive(pthread_mutex_t *mutex,
31-
uint64_t word) {
32-
uint64_t lock;
33-
word = MUTEX_UNLOCK(word);
34-
lock = MUTEX_LOCK(word);
35-
if (atomic_compare_exchange_weak_explicit(&mutex->_word, &word, lock,
36-
memory_order_acquire,
37-
memory_order_relaxed))
30+
static errno_t pthread_mutex_trylock_spin(atomic_int *word) {
31+
if (!atomic_exchange_explicit(word, 1, memory_order_acquire))
3832
return 0;
3933
return EBUSY;
4034
}
@@ -116,7 +110,7 @@ errno_t pthread_mutex_trylock(pthread_mutex_t *mutex) {
116110
if (_weaken(nsync_futex_wait_)) {
117111
return pthread_mutex_trylock_drepper(&mutex->_futex);
118112
} else {
119-
return pthread_mutex_trylock_naive(mutex, word);
113+
return pthread_mutex_trylock_spin(&mutex->_futex);
120114
}
121115
}
122116

libc/intrin/pthread_mutex_unlock.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,8 @@
2828
#include "third_party/nsync/futex.internal.h"
2929
#include "third_party/nsync/mu.h"
3030

31-
static void pthread_mutex_unlock_naive(pthread_mutex_t *mutex, uint64_t word) {
32-
uint64_t lock = MUTEX_UNLOCK(word);
33-
atomic_store_explicit(&mutex->_word, lock, memory_order_release);
31+
static void pthread_mutex_unlock_spin(atomic_int *word) {
32+
atomic_store_explicit(word, 0, memory_order_release);
3433
}
3534

3635
// see "take 3" algorithm in "futexes are tricky" by ulrich drepper
@@ -102,7 +101,7 @@ errno_t pthread_mutex_unlock(pthread_mutex_t *mutex) {
102101
if (_weaken(nsync_futex_wake_)) {
103102
pthread_mutex_unlock_drepper(&mutex->_futex, MUTEX_PSHARED(word));
104103
} else {
105-
pthread_mutex_unlock_naive(mutex, word);
104+
pthread_mutex_unlock_spin(&mutex->_futex);
106105
}
107106
return 0;
108107
}

libc/intrin/pthread_spin_lock.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,12 @@
3838
* @see pthread_spin_init
3939
*/
4040
errno_t pthread_spin_lock(pthread_spinlock_t *spin) {
41-
while (atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire)) {
42-
pthread_pause_np();
41+
for (;;) {
42+
if (!atomic_exchange_explicit(&spin->_lock, 1, memory_order_acquire))
43+
break;
44+
for (;;)
45+
if (!atomic_load_explicit(&spin->_lock, memory_order_relaxed))
46+
break;
4347
}
4448
return 0;
4549
}

libc/proc/fork.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ static void _onfork_child(void) {
8181
_rand64_lock_obj = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
8282
_pthread_lock_obj = (pthread_mutex_t)PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
8383
atomic_store_explicit(&__maps.lock, 0, memory_order_relaxed);
84-
atomic_store_explicit(&__get_tls()->tib_relock_maps, 0, memory_order_relaxed);
8584
if (_weaken(_pthread_onfork_child))
8685
_weaken(_pthread_onfork_child)();
8786
}

libc/runtime/getsymboltable.c

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
│ PERFORMANCE OF THIS SOFTWARE. │
1818
╚─────────────────────────────────────────────────────────────────────────────*/
1919
#include "libc/assert.h"
20+
#include "libc/atomic.h"
21+
#include "libc/cosmo.h"
2022
#include "libc/errno.h"
2123
#include "libc/intrin/promises.h"
2224
#include "libc/intrin/strace.h"
@@ -27,14 +29,12 @@
2729
#include "libc/runtime/symbols.internal.h"
2830
#include "libc/runtime/zipos.internal.h"
2931
#include "libc/str/str.h"
30-
#include "libc/thread/thread.h"
3132
#include "libc/x/x.h"
3233
#include "libc/zip.internal.h"
3334
#include "third_party/puff/puff.h"
3435

3536
__static_yoink("__get_symbol");
3637

37-
static pthread_spinlock_t g_lock;
3838
struct SymbolTable *__symtab; // for kprintf
3939

4040
static ssize_t GetZipFile(struct Zipos *zipos, const char *name) {
@@ -100,6 +100,25 @@ static struct SymbolTable *GetSymbolTableFromElf(void) {
100100
}
101101
}
102102

103+
static void GetSymbolTableInit(void) {
104+
struct Zipos *z;
105+
int e = errno;
106+
if (!__symtab && !__isworker) {
107+
if (_weaken(__zipos_get) && (z = _weaken(__zipos_get)())) {
108+
if ((__symtab = GetSymbolTableFromZip(z))) {
109+
__symtab->names =
110+
(uint32_t *)((char *)__symtab + __symtab->names_offset);
111+
__symtab->name_base =
112+
(char *)((char *)__symtab + __symtab->name_base_offset);
113+
}
114+
}
115+
if (!__symtab) {
116+
__symtab = GetSymbolTableFromElf();
117+
}
118+
}
119+
errno = e;
120+
}
121+
103122
/**
104123
* Returns symbol table singleton.
105124
*
@@ -121,24 +140,7 @@ static struct SymbolTable *GetSymbolTableFromElf(void) {
121140
* @return symbol table, or NULL if not found
122141
*/
123142
struct SymbolTable *GetSymbolTable(void) {
124-
struct Zipos *z;
125-
if (pthread_spin_trylock(&g_lock))
126-
return 0;
127-
int e = errno;
128-
if (!__symtab && !__isworker) {
129-
if (_weaken(__zipos_get) && (z = _weaken(__zipos_get)())) {
130-
if ((__symtab = GetSymbolTableFromZip(z))) {
131-
__symtab->names =
132-
(uint32_t *)((char *)__symtab + __symtab->names_offset);
133-
__symtab->name_base =
134-
(char *)((char *)__symtab + __symtab->name_base_offset);
135-
}
136-
}
137-
if (!__symtab) {
138-
__symtab = GetSymbolTableFromElf();
139-
}
140-
}
141-
errno = e;
142-
pthread_spin_unlock(&g_lock);
143+
static atomic_uint once;
144+
cosmo_once(&once, GetSymbolTableInit);
143145
return __symtab;
144146
}

0 commit comments

Comments
 (0)