Skip to content

Commit f3e28aa

Browse files
committed
Make SSL handshakes much faster
This change boosts SSL handshake performance from 2,627 to ~10,000 per second which is the same level of performance as NGINX at establishing secure connections. That's impressive if we consider that redbean is a forking frontend application server. This was accomplished by: 1. Enabling either SSL session caching or SSL tickets. We choose to use tickets since they reduce network round trips too and that's a more important metric than wrk'ing localhost. 2. Fixing mbedtls_mpi_sub_abs() which is the most frequently called function. It's called about 12,000 times during an SSL handshake since it's the basis of most arithmetic operations like addition and for some strange reason it was designed to make two needless copies in addition to calling malloc and free. That's now fixed. 3. Improving TLS output buffering during the SSL handshake only, so that only a single is write and read system call is needed until blocking on the ping pong. redbean will now do a better job wiping sensitive memory from a child process as soon as it's not needed. The nice thing about fork is it's much faster than reverse proxying so the goal is to use the different address spaces along with setuid() to minimize the risk that a server key will be compromised in the event that application code is hacked.
1 parent 8c4cce0 commit f3e28aa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

103 files changed

+1309
-1084
lines changed

libc/calls/sigenter-xnu.c

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "libc/calls/internal.h"
2121
#include "libc/calls/struct/siginfo.h"
2222
#include "libc/calls/ucontext.h"
23+
#include "libc/intrin/repstosb.h"
2324
#include "libc/str/str.h"
2425

2526
/**
@@ -401,7 +402,14 @@ noasan static void xnuthreadstate2linux(
401402
mc->fs = xnuss->__fs;
402403
mc->eflags = xnuss->__rflags;
403404
uc->uc_flags = xnuss->__rflags;
404-
memcpy(&mc->r8, &xnuss->__r8, 8 * sizeof(int64_t));
405+
mc->r8 = xnuss->__r8;
406+
mc->r9 = xnuss->__r9;
407+
mc->r10 = xnuss->__r10;
408+
mc->r11 = xnuss->__r11;
409+
mc->r12 = xnuss->__r12;
410+
mc->r13 = xnuss->__r13;
411+
mc->r14 = xnuss->__r14;
412+
mc->r15 = xnuss->__r15;
405413
}
406414

407415
noasan static void linuxthreadstate2xnu(
@@ -420,7 +428,21 @@ noasan static void linuxthreadstate2xnu(
420428
xnuss->__fs = mc->fs;
421429
xnuss->__rflags = mc->eflags;
422430
xnuss->__rflags = uc->uc_flags;
423-
memcpy(&xnuss->__r8, &mc->r8, 8 * sizeof(int64_t));
431+
xnuss->__r8 = mc->r8;
432+
xnuss->__r9 = mc->r9;
433+
xnuss->__r10 = mc->r10;
434+
xnuss->__r11 = mc->r11;
435+
xnuss->__r12 = mc->r12;
436+
xnuss->__r13 = mc->r13;
437+
xnuss->__r14 = mc->r14;
438+
xnuss->__r15 = mc->r15;
439+
}
440+
441+
noasan static void CopyFpXmmRegs(void *d, const void *s) {
442+
size_t i;
443+
for (i = 0; i < (8 + 16) * 16; i += 16) {
444+
__builtin_memcpy((char *)d + i, (const char *)s + i, 16);
445+
}
424446
}
425447

426448
noasan static void xnussefpustate2linux(
@@ -433,8 +455,7 @@ noasan static void xnussefpustate2linux(
433455
fs->rdp = xnufs->__fpu_dp;
434456
fs->mxcsr = xnufs->__fpu_mxcsr;
435457
fs->mxcr_mask = xnufs->__fpu_mxcsrmask;
436-
/* copy st0-st7 as well as xmm0-xmm15 */
437-
memcpy(fs->st, &xnufs->__fpu_stmm0, (8 + 16) * sizeof(uint128_t));
458+
CopyFpXmmRegs(fs->st, &xnufs->__fpu_stmm0);
438459
}
439460

440461
noasan static void linuxssefpustate2xnu(
@@ -447,8 +468,7 @@ noasan static void linuxssefpustate2xnu(
447468
xnufs->__fpu_dp = fs->rdp;
448469
xnufs->__fpu_mxcsr = fs->mxcsr;
449470
xnufs->__fpu_mxcsrmask = fs->mxcr_mask;
450-
/* copy st0-st7 as well as xmm0-xmm15 */
451-
memcpy(&xnufs->__fpu_stmm0, fs->st, (8 + 16) * sizeof(uint128_t));
471+
CopyFpXmmRegs(&xnufs->__fpu_stmm0, fs->st);
452472
}
453473

454474
noasan void __sigenter_xnu(void *fn, int infostyle, int sig,
@@ -462,10 +482,9 @@ noasan void __sigenter_xnu(void *fn, int infostyle, int sig,
462482
} g;
463483
rva = __sighandrvas[sig & (NSIG - 1)];
464484
if (rva >= kSigactionMinRva) {
465-
memset(&g, 0, sizeof(g));
485+
repstosb(&g, 0, sizeof(g));
466486
if (xnuctx) {
467-
memcpy(&g.uc.uc_sigmask, &xnuctx->uc_sigmask,
468-
MIN(sizeof(g.uc.uc_sigmask), sizeof(xnuctx->uc_sigmask)));
487+
g.uc.uc_sigmask.__bits[0] = xnuctx->uc_sigmask;
469488
g.uc.uc_stack.ss_sp = xnuctx->uc_stack.ss_sp;
470489
g.uc.uc_stack.ss_flags = xnuctx->uc_stack.ss_flags;
471490
g.uc.uc_stack.ss_size = xnuctx->uc_stack.ss_size;

libc/intrin/mpsadbw.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,5 @@ void(mpsadbw)(uint16_t c[8], const uint8_t b[16], const uint8_t a[16],
4040
r[i] += ABS(b[(control & 4) + i + j] - a[(control & 3) * 4 + j]);
4141
}
4242
}
43-
memcpy(c, r, 16);
43+
__builtin_memcpy(c, r, 16);
4444
}

libc/intrin/pabsb.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,5 @@ void(pabsb)(uint8_t a[16], const int8_t b[16]) {
3030
for (i = 0; i < 16; ++i) {
3131
r[i] = ABS(b[i]);
3232
}
33-
memcpy(a, r, 16);
33+
__builtin_memcpy(a, r, 16);
3434
}

libc/intrin/pabsd.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,5 @@ void(pabsd)(uint32_t a[4], const int32_t b[4]) {
3030
for (i = 0; i < 4; ++i) {
3131
r[i] = b[i] >= 0 ? b[i] : -(uint32_t)b[i];
3232
}
33-
memcpy(a, r, 16);
33+
__builtin_memcpy(a, r, 16);
3434
}

libc/intrin/pabsw.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,5 @@ void(pabsw)(uint16_t a[8], const int16_t b[8]) {
3030
for (i = 0; i < 8; ++i) {
3131
r[i] = ABS(b[i]);
3232
}
33-
memcpy(a, r, 16);
33+
__builtin_memcpy(a, r, 16);
3434
}

libc/intrin/packsswb.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,5 @@ void(packsswb)(int8_t a[16], const int16_t b[8], const int16_t c[8]) {
3434
int8_t r[16];
3535
for (i = 0; i < 8; ++i) r[i + 0] = MIN(INT8_MAX, MAX(INT8_MIN, b[i]));
3636
for (i = 0; i < 8; ++i) r[i + 8] = MIN(INT8_MAX, MAX(INT8_MIN, c[i]));
37-
memcpy(a, r, 16);
37+
__builtin_memcpy(a, r, 16);
3838
}

libc/intrin/packusdw.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,5 @@ void(packusdw)(uint16_t a[8], const int32_t b[4], const int32_t c[4]) {
3030
uint16_t r[8];
3131
for (i = 0; i < 4; ++i) r[i + 0] = MIN(UINT16_MAX, MAX(UINT16_MIN, b[i]));
3232
for (i = 0; i < 4; ++i) r[i + 4] = MIN(UINT16_MAX, MAX(UINT16_MIN, c[i]));
33-
memcpy(a, r, 16);
33+
__builtin_memcpy(a, r, 16);
3434
}

libc/intrin/packuswb.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,5 @@ void(packuswb)(uint8_t a[16], const int16_t b[8], const int16_t c[8]) {
3434
uint8_t r[16];
3535
for (i = 0; i < 8; ++i) r[i + 0] = MIN(UINT8_MAX, MAX(UINT8_MIN, b[i]));
3636
for (i = 0; i < 8; ++i) r[i + 8] = MIN(UINT8_MAX, MAX(UINT8_MIN, c[i]));
37-
memcpy(a, r, 16);
37+
__builtin_memcpy(a, r, 16);
3838
}

libc/intrin/paddb.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,5 +31,5 @@ void(paddb)(int8_t a[16], const int8_t b[16], const int8_t c[16]) {
3131
unsigned i;
3232
int8_t r[16];
3333
for (i = 0; i < 16; ++i) r[i] = b[i] + c[i];
34-
memcpy(a, r, 16);
34+
__builtin_memcpy(a, r, 16);
3535
}

libc/intrin/paddd.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,5 +33,5 @@ void(paddd)(uint32_t a[4], const uint32_t b[4], const uint32_t c[4]) {
3333
for (i = 0; i < 4; ++i) {
3434
r[i] = b[i] + c[i];
3535
}
36-
memcpy(a, r, 16);
36+
__builtin_memcpy(a, r, 16);
3737
}

0 commit comments

Comments
 (0)