Skip to content

Commit 0b3c81d

Browse files
committed
Make fork() go 30% faster
This change makes fork() go nearly as fast as sys_fork() on UNIX. As for Windows this change shaves about 4-5ms off fork() + wait() latency. This is accomplished by using WriteProcessMemory() from the parent process to setup the address space of a suspended process; it is better than a pipe
1 parent 98c5847 commit 0b3c81d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+760
-640
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ ARCH = aarch64
135135
HOSTS ?= pi pi5 studio freebsdarm
136136
else
137137
ARCH = x86_64
138-
HOSTS ?= freebsd rhel7 xnu openbsd netbsd win10
138+
HOSTS ?= freebsd rhel7 xnu openbsd netbsd win10 luna
139139
endif
140140

141141
ZIPOBJ_FLAGS += -a$(ARCH)

libc/intrin/describemapping.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,21 @@
1717
│ PERFORMANCE OF THIS SOFTWARE. │
1818
╚─────────────────────────────────────────────────────────────────────────────*/
1919
#include "libc/intrin/describeflags.h"
20+
#include "libc/intrin/maps.h"
2021
#include "libc/runtime/memtrack.internal.h"
2122
#include "libc/sysv/consts/map.h"
2223
#include "libc/sysv/consts/prot.h"
2324

2425
static char DescribeMapType(int flags) {
2526
switch (flags & MAP_TYPE) {
2627
case MAP_FILE:
28+
if (flags & MAP_NOFORK)
29+
return 'i'; // executable image
2730
return '-';
2831
case MAP_PRIVATE:
2932
if (flags & MAP_NOFORK)
30-
return 'P';
31-
else
32-
return 'p';
33+
return 'w'; // windows memory
34+
return 'p';
3335
case MAP_SHARED:
3436
return 's';
3537
default:

libc/intrin/dlopen.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#include "libc/thread/posixthread.internal.h"
2020
#include "libc/thread/thread.h"
2121

22-
pthread_mutex_t __dlopen_lock_obj = PTHREAD_MUTEX_INITIALIZER;
22+
static pthread_mutex_t __dlopen_lock_obj = PTHREAD_MUTEX_INITIALIZER;
2323

2424
void __dlopen_lock(void) {
2525
_pthread_mutex_lock(&__dlopen_lock_obj);
@@ -28,3 +28,7 @@ void __dlopen_lock(void) {
2828
void __dlopen_unlock(void) {
2929
_pthread_mutex_unlock(&__dlopen_lock_obj);
3030
}
31+
32+
void __dlopen_wipe(void) {
33+
_pthread_mutex_wipe_np(&__dlopen_lock_obj);
34+
}

libc/intrin/localtime_lock.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#include "libc/thread/posixthread.internal.h"
2020
#include "third_party/tz/lock.h"
2121

22-
pthread_mutex_t __localtime_lock_obj = PTHREAD_MUTEX_INITIALIZER;
22+
static pthread_mutex_t __localtime_lock_obj = PTHREAD_MUTEX_INITIALIZER;
2323

2424
void __localtime_lock(void) {
2525
_pthread_mutex_lock(&__localtime_lock_obj);
@@ -28,3 +28,7 @@ void __localtime_lock(void) {
2828
void __localtime_unlock(void) {
2929
_pthread_mutex_unlock(&__localtime_lock_obj);
3030
}
31+
32+
void __localtime_wipe(void) {
33+
_pthread_mutex_wipe_np(&__localtime_lock_obj);
34+
}

libc/intrin/maps.c

Lines changed: 21 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "libc/nexgen32e/rdtsc.h"
3131
#include "libc/runtime/runtime.h"
3232
#include "libc/runtime/stack.h"
33+
#include "libc/sysv/consts/map.h"
3334
#include "libc/sysv/consts/prot.h"
3435
#include "libc/thread/lock.h"
3536
#include "libc/thread/tls.h"
@@ -40,10 +41,6 @@ __static_yoink("_init_maps");
4041

4142
#define ABI privileged optimizespeed
4243

43-
// take great care if you enable this
44-
// especially if you're using --ftrace too
45-
#define DEBUG_MAPS_LOCK 0
46-
4744
struct Maps __maps;
4845

4946
void __maps_add(struct Map *map) {
@@ -61,14 +58,18 @@ void __maps_stack(char *stackaddr, int pagesz, int guardsize, size_t stacksize,
6158
__maps.stack.addr = stackaddr + guardsize;
6259
__maps.stack.size = stacksize - guardsize;
6360
__maps.stack.prot = stackprot;
64-
__maps.stack.hand = -1;
61+
__maps.stack.hand = MAPS_SUBREGION;
62+
__maps.stack.flags = MAP_PRIVATE | MAP_ANONYMOUS;
6563
__maps_adder(&__maps.stack, pagesz);
6664
if (guardsize) {
6765
__maps.guard.addr = stackaddr;
6866
__maps.guard.size = guardsize;
69-
__maps.guard.prot = PROT_NONE;
67+
__maps.guard.prot = PROT_NONE | PROT_GUARD;
7068
__maps.guard.hand = stackhand;
69+
__maps.guard.flags = MAP_PRIVATE | MAP_ANONYMOUS;
7170
__maps_adder(&__maps.guard, pagesz);
71+
} else {
72+
__maps.stack.hand = stackhand;
7273
}
7374
}
7475

@@ -102,28 +103,13 @@ void __maps_init(void) {
102103
}
103104

104105
// record .text and .data mappings
105-
static struct Map text, data;
106-
text.addr = (char *)__executable_start;
107-
text.size = _etext - __executable_start;
108-
text.prot = PROT_READ | PROT_EXEC;
106+
__maps_track((char *)__executable_start, _etext - __executable_start,
107+
PROT_READ | PROT_EXEC, MAP_NOFORK);
109108
uintptr_t ds = ((uintptr_t)_etext + pagesz - 1) & -pagesz;
110-
if (ds < (uintptr_t)_end) {
111-
data.addr = (char *)ds;
112-
data.size = (uintptr_t)_end - ds;
113-
data.prot = PROT_READ | PROT_WRITE;
114-
__maps_adder(&data, pagesz);
115-
}
116-
__maps_adder(&text, pagesz);
117-
}
118-
119-
#if DEBUG_MAPS_LOCK
120-
privileged static void __maps_panic(const char *msg) {
121-
// it's only safe to pass a format string. if we use directives such
122-
// as %s, %t etc. then kprintf() will recursively call __maps_lock()
123-
kprintf(msg);
124-
DebugBreak();
109+
if (ds < (uintptr_t)_end)
110+
__maps_track((char *)ds, (uintptr_t)_end - ds, PROT_READ | PROT_WRITE,
111+
MAP_NOFORK);
125112
}
126-
#endif
127113

128114
bool __maps_held(void) {
129115
return __tls_enabled && !(__get_tls()->tib_flags & TIB_FLAG_VFORKED) &&
@@ -143,7 +129,12 @@ ABI void __maps_lock(void) {
143129
if (tib->tib_flags & TIB_FLAG_VFORKED)
144130
return;
145131
me = atomic_load_explicit(&tib->tib_ptid, memory_order_relaxed);
146-
if (me <= 0)
132+
word = 0;
133+
lock = MUTEX_LOCK(word);
134+
lock = MUTEX_SET_OWNER(lock, me);
135+
if (atomic_compare_exchange_strong_explicit(&__maps.lock.word, &word, lock,
136+
memory_order_acquire,
137+
memory_order_relaxed))
147138
return;
148139
word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed);
149140
for (;;) {
@@ -154,24 +145,13 @@ ABI void __maps_lock(void) {
154145
return;
155146
continue;
156147
}
157-
#if DEBUG_MAPS_LOCK
158-
if (__deadlock_tracked(&__maps.lock) == 1)
159-
__maps_panic("error: maps lock already held\n");
160-
if (__deadlock_check(&__maps.lock, 1))
161-
__maps_panic("error: maps lock is cyclic\n");
162-
#endif
163148
word = 0;
164149
lock = MUTEX_LOCK(word);
165150
lock = MUTEX_SET_OWNER(lock, me);
166151
if (atomic_compare_exchange_weak_explicit(&__maps.lock.word, &word, lock,
167152
memory_order_acquire,
168-
memory_order_relaxed)) {
169-
#if DEBUG_MAPS_LOCK
170-
__deadlock_track(&__maps.lock, 0);
171-
__deadlock_record(&__maps.lock, 0);
172-
#endif
153+
memory_order_relaxed))
173154
return;
174-
}
175155
for (;;) {
176156
word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed);
177157
if (MUTEX_OWNER(word) == me)
@@ -183,7 +163,6 @@ ABI void __maps_lock(void) {
183163
}
184164

185165
ABI void __maps_unlock(void) {
186-
int me;
187166
uint64_t word;
188167
struct CosmoTib *tib;
189168
if (!__tls_enabled)
@@ -192,28 +171,16 @@ ABI void __maps_unlock(void) {
192171
return;
193172
if (tib->tib_flags & TIB_FLAG_VFORKED)
194173
return;
195-
me = atomic_load_explicit(&tib->tib_ptid, memory_order_relaxed);
196-
if (me <= 0)
197-
return;
198174
word = atomic_load_explicit(&__maps.lock.word, memory_order_relaxed);
199-
#if DEBUG_MAPS_LOCK
200-
if (__deadlock_tracked(&__maps.lock) == 0)
201-
__maps_panic("error: maps lock not owned by caller\n");
202-
#endif
203175
for (;;) {
204-
if (MUTEX_DEPTH(word)) {
176+
if (MUTEX_DEPTH(word))
205177
if (atomic_compare_exchange_weak_explicit(
206178
&__maps.lock.word, &word, MUTEX_DEC_DEPTH(word),
207179
memory_order_relaxed, memory_order_relaxed))
208180
break;
209-
}
210181
if (atomic_compare_exchange_weak_explicit(&__maps.lock.word, &word, 0,
211182
memory_order_release,
212-
memory_order_relaxed)) {
213-
#if DEBUG_MAPS_LOCK
214-
__deadlock_untrack(&__maps.lock);
215-
#endif
183+
memory_order_relaxed))
216184
break;
217-
}
218185
}
219186
}

libc/intrin/maps.h

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,37 @@
55
#include "libc/runtime/runtime.h"
66
COSMOPOLITAN_C_START_
77

8+
/* size of dynamic memory that is used internally by your memory manager */
9+
#define MAPS_SIZE 65536
10+
11+
/* when map->hand is MAPS_RESERVATION it means mmap() is transactionally
12+
reserving address space it is in the process of requesting from win32 */
13+
#define MAPS_RESERVATION -2
14+
15+
/* when map->hand is MAPS_SUBREGION it means that an allocation has been
16+
broken into multiple fragments by mprotect(). the first fragment must
17+
be set to MAPS_VIRTUAL or your CreateFileMapping() handle. your frags
18+
must be perfectly contiguous in memory and should have the same flags */
19+
#define MAPS_SUBREGION -3
20+
21+
/* indicates an allocation was created by VirtualAlloc() and so munmap()
22+
must call VirtualFree() when destroying it. use it on the hand field. */
23+
#define MAPS_VIRTUAL -4
24+
25+
/* if this is used on MAP_PRIVATE memory, then it's assumed to be memory
26+
that win32 allocated, e.g. a CreateThread() stack. if this is used on
27+
MAP_FILE memory, then it's assumed to be part of the executable image */
28+
#define MAP_NOFORK 0x10000000
29+
830
#define MAP_TREE_CONTAINER(e) TREE_CONTAINER(struct Map, tree, e)
931

1032
struct Map {
1133
char *addr; /* granule aligned */
1234
size_t size; /* must be nonzero */
1335
int64_t off; /* ignore for anon */
1436
int flags; /* memory map flag */
15-
char prot; /* memory protects */
37+
short prot; /* memory protects */
1638
bool iscow; /* windows nt only */
17-
bool precious; /* windows nt only */
1839
bool readonlyfile; /* windows nt only */
1940
unsigned visited; /* checks and fork */
2041
intptr_t hand; /* windows nt only */
@@ -29,11 +50,17 @@ struct MapLock {
2950
_Atomic(uint64_t) word;
3051
};
3152

53+
struct MapSlab {
54+
struct MapSlab *next;
55+
struct Map maps[(MAPS_SIZE - sizeof(struct MapSlab *)) / sizeof(struct Map)];
56+
};
57+
3258
struct Maps {
3359
uint128_t rand;
3460
struct Tree *maps;
3561
struct MapLock lock;
3662
_Atomic(uintptr_t) freed;
63+
_Atomic(struct MapSlab *) slabs;
3764
size_t count;
3865
size_t pages;
3966
struct Map stack;
@@ -76,33 +103,37 @@ forceinline optimizespeed int __maps_search(const void *key,
76103
return (addr > map->addr) - (addr < map->addr);
77104
}
78105

79-
static inline struct Map *__maps_next(struct Map *map) {
106+
dontinstrument static inline struct Map *__maps_next(struct Map *map) {
80107
struct Tree *node;
81108
if ((node = tree_next(&map->tree)))
82109
return MAP_TREE_CONTAINER(node);
83110
return 0;
84111
}
85112

86-
static inline struct Map *__maps_prev(struct Map *map) {
113+
dontinstrument static inline struct Map *__maps_prev(struct Map *map) {
87114
struct Tree *node;
88115
if ((node = tree_prev(&map->tree)))
89116
return MAP_TREE_CONTAINER(node);
90117
return 0;
91118
}
92119

93-
static inline struct Map *__maps_first(void) {
120+
dontinstrument static inline struct Map *__maps_first(void) {
94121
struct Tree *node;
95122
if ((node = tree_first(__maps.maps)))
96123
return MAP_TREE_CONTAINER(node);
97124
return 0;
98125
}
99126

100-
static inline struct Map *__maps_last(void) {
127+
dontinstrument static inline struct Map *__maps_last(void) {
101128
struct Tree *node;
102129
if ((node = tree_last(__maps.maps)))
103130
return MAP_TREE_CONTAINER(node);
104131
return 0;
105132
}
106133

134+
static inline bool __maps_isalloc(struct Map *map) {
135+
return map->hand != MAPS_SUBREGION;
136+
}
137+
107138
COSMOPOLITAN_C_END_
108139
#endif /* COSMOPOLITAN_MAPS_H_ */

0 commit comments

Comments
 (0)