Skip to content

Commit a1677d6

Browse files
committed
Transcode ISO-8859-1 in HTTP headers
If we keep making changes like this, redbean might not be a toy anymore. Additional steps are also being taken now to prevent ANSI control codes sent by the client from slipping into logs.
1 parent dcbd2b8 commit a1677d6

14 files changed

+675
-161
lines changed

net/http/decodelatin1.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
2+
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
3+
╞══════════════════════════════════════════════════════════════════════════════╡
4+
│ Copyright 2021 Justine Alexandra Roberts Tunney │
5+
│ │
6+
│ Permission to use, copy, modify, and/or distribute this software for │
7+
│ any purpose with or without fee is hereby granted, provided that the │
8+
│ above copyright notice and this permission notice appear in all copies. │
9+
│ │
10+
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
11+
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
12+
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
13+
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
14+
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
15+
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
16+
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
17+
│ PERFORMANCE OF THIS SOFTWARE. │
18+
╚─────────────────────────────────────────────────────────────────────────────*/
19+
#include "libc/mem/mem.h"
20+
#include "libc/str/str.h"
21+
#include "net/http/http.h"
22+
23+
/**
24+
* Decodes ISO-8859-1 to UTF-8.
25+
*
26+
* @param data is input value
27+
* @param size if -1 implies strlen
28+
* @param out_size if non-NULL receives output length on success
29+
* @return allocated NUL-terminated buffer, or NULL w/ errno
30+
*/
31+
char *DecodeLatin1(const char *data, size_t size, size_t *out_size) {
32+
int c;
33+
char *r, *q;
34+
const char *p, *e;
35+
if (size == -1) size = strlen(data);
36+
if ((r = malloc(size * 2 + 1))) {
37+
q = r;
38+
p = data;
39+
e = p + size;
40+
while (p < e) {
41+
c = *p++ & 0xff;
42+
if (c < 0200) {
43+
*q++ = c;
44+
} else {
45+
*q++ = 0300 | c >> 6;
46+
*q++ = 0200 | c & 077;
47+
}
48+
}
49+
if (out_size) *out_size = q - r;
50+
*q++ = '\0';
51+
if ((q = realloc(r, q - r))) r = q;
52+
}
53+
return r;
54+
}

net/http/encodehttpheadervalue.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
2+
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
3+
╞══════════════════════════════════════════════════════════════════════════════╡
4+
│ Copyright 2021 Justine Alexandra Roberts Tunney │
5+
│ │
6+
│ Permission to use, copy, modify, and/or distribute this software for │
7+
│ any purpose with or without fee is hereby granted, provided that the │
8+
│ above copyright notice and this permission notice appear in all copies. │
9+
│ │
10+
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
11+
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
12+
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
13+
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
14+
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
15+
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
16+
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
17+
│ PERFORMANCE OF THIS SOFTWARE. │
18+
╚─────────────────────────────────────────────────────────────────────────────*/
19+
#include "libc/errno.h"
20+
#include "libc/mem/mem.h"
21+
#include "libc/str/str.h"
22+
#include "libc/str/thompike.h"
23+
#include "net/http/http.h"
24+
25+
/**
26+
* Encodes HTTP header value.
27+
*
28+
* This operation involves the following:
29+
*
30+
* 1. Trim whitespace.
31+
* 2. Turn UTF-8 into ISO-8859-1.
32+
* 3. Make sure no C0 or C1 control codes are present (except tab).
33+
*
34+
* If the input value isn't thompson-pike encoded then this
35+
* implementation will fall back to latin1 in most cases.
36+
*
37+
* @param data is input value
38+
* @param size if -1 implies strlen
39+
* @param out_size if non-NULL receives output length on success
40+
* @return allocated NUL-terminated string, or NULL w/ errno
41+
*/
42+
char *EncodeHttpHeaderValue(const char *data, size_t size, size_t *out_size) {
43+
bool t;
44+
wint_t x;
45+
char *r, *q;
46+
const char *p, *e;
47+
if (size == -1) size = strlen(data);
48+
if ((r = malloc(size + 1))) {
49+
t = 0;
50+
q = r;
51+
p = data;
52+
e = p + size;
53+
while (p < e) {
54+
x = *p++ & 0xff;
55+
if (x >= 0300) {
56+
if (p < e && ThomPikeCont(*p)) {
57+
if (ThomPikeLen(x) == 2) {
58+
x = ThomPikeMerge(ThomPikeByte(x), *p++);
59+
} else {
60+
x = 0;
61+
}
62+
}
63+
}
64+
if (!t) {
65+
if (x == ' ' || x == '\t') {
66+
continue;
67+
} else {
68+
t = true;
69+
}
70+
}
71+
if ((0x20 <= x && x <= 0x7E) || (0xA0 <= x && x <= 0xFF) || x == '\t') {
72+
*q++ = x;
73+
} else {
74+
free(r);
75+
errno = EILSEQ;
76+
return NULL;
77+
}
78+
}
79+
while (q > r && (q[-1] == ' ' || q[-1] == '\t')) --q;
80+
if (out_size) *out_size = q - r;
81+
*q++ = '\0';
82+
if ((q = realloc(r, q - r))) r = q;
83+
}
84+
return r;
85+
}

net/http/http.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,10 @@ unsigned ParseHttpVersion(const char *, size_t);
121121
int64_t ParseHttpDateTime(const char *, size_t);
122122
const char *GetHttpReason(int);
123123
const char *GetHttpHeaderName(int);
124+
char *DecodeLatin1(const char *, size_t, size_t *);
125+
bool IsValidHttpToken(const char *, size_t);
126+
char *EncodeHttpHeaderValue(const char *, size_t, size_t *);
127+
char *VisualizeControlCodes(const char *, size_t, size_t *);
124128

125129
COSMOPOLITAN_C_END_
126130
#endif /* !(__ASSEMBLER__ + __LINKER__ + 0) */

net/http/isvalidhttptoken.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
2+
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
3+
╞══════════════════════════════════════════════════════════════════════════════╡
4+
│ Copyright 2021 Justine Alexandra Roberts Tunney │
5+
│ │
6+
│ Permission to use, copy, modify, and/or distribute this software for │
7+
│ any purpose with or without fee is hereby granted, provided that the │
8+
│ above copyright notice and this permission notice appear in all copies. │
9+
│ │
10+
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
11+
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
12+
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
13+
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
14+
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
15+
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
16+
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
17+
│ PERFORMANCE OF THIS SOFTWARE. │
18+
╚─────────────────────────────────────────────────────────────────────────────*/
19+
#include "libc/str/str.h"
20+
#include "net/http/http.h"
21+
22+
// http/1.1 token dispatch
23+
// 0 is CTLs, SP, ()<>@,;:\"/[]?={}
24+
// 1 is legal ascii
25+
static const char kHttpToken[256] = {
26+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00
27+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10
28+
0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, // 0x20
29+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, // 0x30
30+
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
31+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, // 0x50
32+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
33+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, // 0x70
34+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80
35+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90
36+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xa0
37+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xb0
38+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xc0
39+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xd0
40+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xe0
41+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0
42+
};
43+
44+
bool IsValidHttpToken(const char *s, size_t n) {
45+
size_t i;
46+
if (!n) return false;
47+
if (n == -1) n = strlen(s);
48+
for (i = 0; i < n; ++i) {
49+
if (!kHttpToken[s[i] & 0xff]) {
50+
return false;
51+
}
52+
}
53+
return true;
54+
}

net/http/parsehttprequest.c

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,19 @@ void DestroyHttpRequest(struct HttpRequest *r) {
4747

4848
/**
4949
* Parses HTTP request.
50+
*
51+
* This parser is responsible for determining the length of a message
52+
* and slicing the strings inside it. Performance is attained using
53+
* perfect hash tables. No memory allocation is performed for normal
54+
* messages. Line folding is forbidden. State persists across calls so
55+
* that fragmented messages can be handled efficiently. A limitation on
56+
* message size is imposed to make the header data structures smaller.
57+
* All other things are permissive to the greatest extent possible.
58+
* Further functions are provided for the interpretation, validation,
59+
* and sanitization of various fields.
5060
*/
5161
int ParseHttpRequest(struct HttpRequest *r, const char *p, size_t n) {
52-
int c, h;
62+
int c, h, i;
5363
struct HttpRequestHeader *x;
5464
for (n = MIN(n, LIMIT); r->i < n; ++r->i) {
5565
c = p[r->i] & 0xff;
@@ -122,14 +132,16 @@ int ParseHttpRequest(struct HttpRequest *r, const char *p, size_t n) {
122132
/* fallthrough */
123133
case HVAL:
124134
if (c == '\r' || c == '\n') {
135+
i = r->i;
136+
while (i > r->a && (p[i - 1] == ' ' || p[i - 1] == '\t')) --i;
125137
if ((h = GetHttpHeader(p + r->k.a, r->k.b - r->k.a)) != -1) {
126138
r->headers[h].a = r->a;
127-
r->headers[h].b = r->i;
139+
r->headers[h].b = i;
128140
} else if ((x = realloc(r->xheaders.p, (r->xheaders.n + 1) *
129141
sizeof(*r->xheaders.p)))) {
130142
x[r->xheaders.n].k = r->k;
131143
x[r->xheaders.n].v.a = r->a;
132-
x[r->xheaders.n].v.b = r->i;
144+
x[r->xheaders.n].v.b = i;
133145
r->xheaders.p = x;
134146
++r->xheaders.n;
135147
}

net/http/visualizecontrolcodes.c

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
2+
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
3+
╞══════════════════════════════════════════════════════════════════════════════╡
4+
│ Copyright 2021 Justine Alexandra Roberts Tunney │
5+
│ │
6+
│ Permission to use, copy, modify, and/or distribute this software for │
7+
│ any purpose with or without fee is hereby granted, provided that the │
8+
│ above copyright notice and this permission notice appear in all copies. │
9+
│ │
10+
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
11+
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
12+
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
13+
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
14+
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
15+
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
16+
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
17+
│ PERFORMANCE OF THIS SOFTWARE. │
18+
╚─────────────────────────────────────────────────────────────────────────────*/
19+
#include "libc/mem/mem.h"
20+
#include "libc/str/str.h"
21+
#include "libc/str/thompike.h"
22+
#include "libc/str/tpenc.h"
23+
#include "net/http/http.h"
24+
25+
/**
26+
* Filters out control codes from string.
27+
*
28+
* This is useful for logging data like HTTP messages, where we don't
29+
* want full blown C string literal escaping, but we don't want things
30+
* like raw ANSI control codes from untrusted devices in our terminals.
31+
*
32+
* @param data is input value
33+
* @param size if -1 implies strlen
34+
* @param out_size if non-NULL receives output length on success
35+
* @return allocated NUL-terminated buffer, or NULL w/ errno
36+
*/
37+
char *VisualizeControlCodes(const char *data, size_t size, size_t *out_size) {
38+
uint64_t w;
39+
char *r, *q;
40+
unsigned i, n;
41+
wint_t x, a, b;
42+
const char *p, *e;
43+
if (size == -1) size = strlen(data);
44+
if ((r = malloc(size * 6 + 1))) {
45+
q = r;
46+
p = data;
47+
e = p + size;
48+
while (p < e) {
49+
x = *p++ & 0xff;
50+
if (x >= 0300) {
51+
a = ThomPikeByte(x);
52+
n = ThomPikeLen(x) - 1;
53+
if (p + n <= e) {
54+
for (i = 0;;) {
55+
b = p[i] & 0xff;
56+
if (!ThomPikeCont(b)) break;
57+
a = ThomPikeMerge(a, b);
58+
if (++i == n) {
59+
x = a;
60+
p += i;
61+
break;
62+
}
63+
}
64+
}
65+
}
66+
if (0x80 <= x && x < 0xA0) {
67+
q[0] = '\\';
68+
q[1] = 'u';
69+
q[2] = '0';
70+
q[3] = '0';
71+
q[4] = "0123456789abcdef"[(x & 0xF0) >> 4];
72+
q[5] = "0123456789abcdef"[(x & 0x0F) >> 0];
73+
q += 6;
74+
} else {
75+
if (0x00 <= x && x < 0x20) {
76+
if (x != '\t' && x != '\r' && x != '\n') {
77+
x += 0x2400; /* Control Pictures */
78+
}
79+
} else if (x == 0x7F) {
80+
x = 0x2421;
81+
}
82+
w = tpenc(x);
83+
do {
84+
*q++ = w;
85+
} while ((w >>= 8));
86+
}
87+
}
88+
if (out_size) *out_size = q - r;
89+
*q++ = '\0';
90+
if ((q = realloc(r, q - r))) r = q;
91+
}
92+
return r;
93+
}

test/net/http/decodelatin1_test.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
2+
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
3+
╞══════════════════════════════════════════════════════════════════════════════╡
4+
│ Copyright 2021 Justine Alexandra Roberts Tunney │
5+
│ │
6+
│ Permission to use, copy, modify, and/or distribute this software for │
7+
│ any purpose with or without fee is hereby granted, provided that the │
8+
│ above copyright notice and this permission notice appear in all copies. │
9+
│ │
10+
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
11+
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
12+
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
13+
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
14+
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
15+
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
16+
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
17+
│ PERFORMANCE OF THIS SOFTWARE. │
18+
╚─────────────────────────────────────────────────────────────────────────────*/
19+
#include "libc/testlib/testlib.h"
20+
#include "net/http/http.h"
21+
22+
size_t n;
23+
24+
TEST(DecodeLatin1, test) {
25+
EXPECT_STREQ("", DecodeLatin1(NULL, 0, 0));
26+
EXPECT_STREQ("¥atta", DecodeLatin1("\245atta", -1, &n));
27+
EXPECT_EQ(6, n);
28+
}

0 commit comments

Comments
 (0)