Skip to content

Commit af3f62a

Browse files
committed
Ensure io requests are always capped at 0x7ffff000
This gives us the Linux behavior across platforms. Fixes #1189
1 parent 6cf9b9e commit af3f62a

File tree

10 files changed

+267
-80
lines changed

10 files changed

+267
-80
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ endif
133133

134134
ifneq ($(findstring aarch64,$(MODE)),)
135135
ARCH = aarch64
136-
HOSTS ?= pi pi5 studio freebsdarm
136+
HOSTS ?= pi studio freebsdarm
137137
else
138138
ARCH = x86_64
139139
HOSTS ?= freebsd rhel7 xnu openbsd netbsd win10

libc/calls/pread.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
#include "libc/macros.internal.h"
3131
#include "libc/runtime/runtime.h"
3232
#include "libc/runtime/zipos.internal.h"
33+
#include "libc/stdio/sysparam.h"
3334
#include "libc/sysv/errfuns.h"
3435

3536
/**
@@ -39,7 +40,7 @@
3940
*
4041
* @param fd is something open()'d earlier, noting pipes might not work
4142
* @param buf is copied into, cf. copy_file_range(), sendfile(), etc.
42-
* @param size in range [1..0x7ffff000] is reasonable
43+
* @param size is always saturated to 0x7ffff000 automatically
4344
* @param offset is bytes from start of file at which read begins
4445
* @return [1..size] bytes on success, 0 on EOF, or -1 w/ errno; with
4546
* exception of size==0, in which case return zero means no error
@@ -58,6 +59,10 @@ ssize_t pread(int fd, void *buf, size_t size, int64_t offset) {
5859
ssize_t rc;
5960
BEGIN_CANCELATION_POINT;
6061

62+
// XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX
63+
// this is inconsistent with Linux which ignores huge requests
64+
size = MIN(size, 0x7ffff000);
65+
6166
if (offset < 0) {
6267
rc = einval();
6368
} else if (fd < 0) {

libc/calls/preadv.c

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,24 +28,55 @@
2828
#include "libc/intrin/likely.h"
2929
#include "libc/intrin/strace.internal.h"
3030
#include "libc/intrin/weaken.h"
31+
#include "libc/limits.h"
32+
#include "libc/mem/alloca.h"
33+
#include "libc/runtime/stack.h"
3134
#include "libc/runtime/zipos.internal.h"
35+
#include "libc/stdckdint.h"
3236
#include "libc/sysv/errfuns.h"
3337

38+
static size_t SumIovecBytes(const struct iovec *iov, int iovlen) {
39+
size_t count = 0;
40+
for (int i = 0; i < iovlen; ++i)
41+
if (ckd_add(&count, count, iov[i].iov_len))
42+
count = SIZE_MAX;
43+
return count;
44+
}
45+
3446
static ssize_t Preadv(int fd, struct iovec *iov, int iovlen, int64_t off) {
3547
int e, i;
3648
size_t got;
3749
ssize_t rc, toto;
3850

39-
if (fd < 0) {
51+
if (fd < 0)
4052
return ebadf();
41-
}
42-
43-
if (iovlen < 0) {
53+
if (iovlen < 0)
4454
return einval();
45-
}
46-
47-
if (IsAsan() && !__asan_is_valid_iov(iov, iovlen)) {
55+
if (IsAsan() && !__asan_is_valid_iov(iov, iovlen))
4856
return efault();
57+
58+
// XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX
59+
// this is inconsistent with Linux which ignores huge requests
60+
if (!IsLinux()) {
61+
size_t sum, remain = 0x7ffff000;
62+
if ((sum = SumIovecBytes(iov, iovlen)) > remain) {
63+
struct iovec *iov2;
64+
#pragma GCC push_options
65+
#pragma GCC diagnostic ignored "-Walloca-larger-than="
66+
iov2 = alloca(iovlen * sizeof(struct iovec));
67+
CheckLargeStackAllocation(iov2, iovlen * sizeof(struct iovec));
68+
#pragma GCC pop_options
69+
for (int i = 0; i < iovlen; ++i) {
70+
iov2[i] = iov[i];
71+
if (remain >= iov2[i].iov_len) {
72+
remain -= iov2[i].iov_len;
73+
} else {
74+
iov2[i].iov_len = remain;
75+
remain = 0;
76+
}
77+
}
78+
iov = iov2;
79+
}
4980
}
5081

5182
if (fd < g_fds.n && g_fds.p[fd].kind == kFdZip) {
@@ -112,6 +143,11 @@ static ssize_t Preadv(int fd, struct iovec *iov, int iovlen, int64_t off) {
112143
/**
113144
* Reads with maximum generality.
114145
*
146+
* It's possible for file write request to be partially completed. For
147+
* example, if the sum of `iov` lengths exceeds 0x7ffff000 then bytes
148+
* beyond that will be ignored. This is a Linux behavior that Cosmo
149+
* polyfills across platforms.
150+
*
115151
* @return number of bytes actually read, or -1 w/ errno
116152
* @cancelationpoint
117153
* @asyncsignalsafe

libc/calls/pwrite.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "libc/intrin/asan.internal.h"
2929
#include "libc/intrin/strace.internal.h"
3030
#include "libc/macros.internal.h"
31+
#include "libc/stdio/sysparam.h"
3132
#include "libc/sysv/errfuns.h"
3233

3334
/**
@@ -37,7 +38,7 @@
3738
*
3839
* @param fd is something open()'d earlier, noting pipes might not work
3940
* @param buf is copied from, cf. copy_file_range(), sendfile(), etc.
40-
* @param size in range [1..0x7ffff000] is reasonable
41+
* @param size is always saturated to 0x7ffff000 automatically
4142
* @param offset is bytes from start of file at which write begins,
4243
* which can exceed or overlap the end of file, in which case your
4344
* file will be extended
@@ -53,6 +54,10 @@ ssize_t pwrite(int fd, const void *buf, size_t size, int64_t offset) {
5354
size_t wrote;
5455
BEGIN_CANCELATION_POINT;
5556

57+
// XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX
58+
// this is inconsistent with Linux which ignores huge requests
59+
size = MIN(size, 0x7ffff000);
60+
5661
if (offset < 0) {
5762
rc = einval();
5863
} else if (fd == -1) {

libc/calls/pwritev.c

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,28 +28,57 @@
2828
#include "libc/intrin/likely.h"
2929
#include "libc/intrin/strace.internal.h"
3030
#include "libc/intrin/weaken.h"
31+
#include "libc/limits.h"
32+
#include "libc/mem/alloca.h"
33+
#include "libc/runtime/stack.h"
34+
#include "libc/stdckdint.h"
3135
#include "libc/sysv/errfuns.h"
3236

37+
static size_t SumIovecBytes(const struct iovec *iov, int iovlen) {
38+
size_t count = 0;
39+
for (int i = 0; i < iovlen; ++i)
40+
if (ckd_add(&count, count, iov[i].iov_len))
41+
count = SIZE_MAX;
42+
return count;
43+
}
44+
3345
static ssize_t Pwritev(int fd, const struct iovec *iov, int iovlen,
3446
int64_t off) {
3547
int i, e;
3648
size_t sent;
3749
ssize_t rc, toto;
3850

39-
if (fd < 0) {
51+
if (fd < 0)
4052
return ebadf();
41-
}
42-
43-
if (iovlen < 0) {
53+
if (iovlen < 0)
4454
return einval();
45-
}
46-
47-
if (IsAsan() && !__asan_is_valid_iov(iov, iovlen)) {
55+
if (IsAsan() && !__asan_is_valid_iov(iov, iovlen))
4856
return efault();
49-
}
50-
51-
if (fd < g_fds.n && g_fds.p[fd].kind == kFdZip) {
57+
if (fd < g_fds.n && g_fds.p[fd].kind == kFdZip)
5258
return ebadf();
59+
60+
// XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX
61+
// this is inconsistent with Linux which ignores huge requests
62+
if (!IsLinux()) {
63+
size_t sum, remain = 0x7ffff000;
64+
if ((sum = SumIovecBytes(iov, iovlen)) > remain) {
65+
struct iovec *iov2;
66+
#pragma GCC push_options
67+
#pragma GCC diagnostic ignored "-Walloca-larger-than="
68+
iov2 = alloca(iovlen * sizeof(struct iovec));
69+
CheckLargeStackAllocation(iov2, iovlen * sizeof(struct iovec));
70+
#pragma GCC pop_options
71+
for (int i = 0; i < iovlen; ++i) {
72+
iov2[i] = iov[i];
73+
if (remain >= iov2[i].iov_len) {
74+
remain -= iov2[i].iov_len;
75+
} else {
76+
iov2[i].iov_len = remain;
77+
remain = 0;
78+
}
79+
}
80+
iov = iov2;
81+
}
5382
}
5483

5584
if (IsWindows()) {
@@ -116,6 +145,11 @@ static ssize_t Pwritev(int fd, const struct iovec *iov, int iovlen,
116145
* been committed. It can also happen if we need to polyfill this system
117146
* call using pwrite().
118147
*
148+
* It's possible for file write request to be partially completed. For
149+
* example, if the sum of `iov` lengths exceeds 0x7ffff000 then bytes
150+
* beyond that will be ignored. This is a Linux behavior that Cosmo
151+
* polyfills across platforms.
152+
*
119153
* @return number of bytes actually sent, or -1 w/ errno
120154
* @cancelationpoint
121155
* @asyncsignalsafe

libc/calls/read.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "libc/runtime/zipos.internal.h"
3030
#include "libc/sock/internal.h"
3131
#include "libc/sock/sock.h"
32+
#include "libc/stdio/sysparam.h"
3233
#include "libc/sysv/errfuns.h"
3334

3435
/**
@@ -41,7 +42,7 @@
4142
*
4243
* @param fd is something open()'d earlier
4344
* @param buf is copied into, cf. copy_file_range(), sendfile(), etc.
44-
* @param size in range [1..0x7ffff000] is reasonable
45+
* @param size is always saturated to 0x7ffff000 automatically
4546
* @return [1..size] bytes on success, 0 on EOF, or -1 w/ errno; with
4647
* exception of size==0, in which case return zero means no error
4748
* @raise EBADF if `fd` is negative or not an open file descriptor
@@ -67,6 +68,10 @@ ssize_t read(int fd, void *buf, size_t size) {
6768
ssize_t rc;
6869
BEGIN_CANCELATION_POINT;
6970

71+
// XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX
72+
// this is inconsistent with Linux which ignores huge requests
73+
size = MIN(size, 0x7ffff000);
74+
7075
if (fd < 0) {
7176
rc = ebadf();
7277
} else if ((!buf && size) || (IsAsan() && !__asan_is_valid(buf, size))) {

libc/calls/readv.c

Lines changed: 70 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,74 @@
2828
#include "libc/intrin/likely.h"
2929
#include "libc/intrin/strace.internal.h"
3030
#include "libc/intrin/weaken.h"
31+
#include "libc/limits.h"
32+
#include "libc/mem/alloca.h"
33+
#include "libc/runtime/stack.h"
3134
#include "libc/runtime/zipos.internal.h"
3235
#include "libc/sock/internal.h"
36+
#include "libc/stdckdint.h"
3337
#include "libc/sysv/errfuns.h"
3438

39+
static size_t SumIovecBytes(const struct iovec *iov, int iovlen) {
40+
size_t count = 0;
41+
for (int i = 0; i < iovlen; ++i)
42+
if (ckd_add(&count, count, iov[i].iov_len))
43+
count = SIZE_MAX;
44+
return count;
45+
}
46+
47+
static ssize_t readv_impl(int fd, const struct iovec *iov, int iovlen) {
48+
if (fd < 0)
49+
return ebadf();
50+
if (iovlen < 0)
51+
return einval();
52+
if (IsAsan() && !__asan_is_valid_iov(iov, iovlen))
53+
return efault();
54+
55+
// XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX
56+
// this is inconsistent with Linux which ignores huge requests
57+
if (!IsLinux()) {
58+
size_t sum, remain = 0x7ffff000;
59+
if ((sum = SumIovecBytes(iov, iovlen)) > remain) {
60+
struct iovec *iov2;
61+
#pragma GCC push_options
62+
#pragma GCC diagnostic ignored "-Walloca-larger-than="
63+
iov2 = alloca(iovlen * sizeof(struct iovec));
64+
CheckLargeStackAllocation(iov2, iovlen * sizeof(struct iovec));
65+
#pragma GCC pop_options
66+
for (int i = 0; i < iovlen; ++i) {
67+
iov2[i] = iov[i];
68+
if (remain >= iov2[i].iov_len) {
69+
remain -= iov2[i].iov_len;
70+
} else {
71+
iov2[i].iov_len = remain;
72+
remain = 0;
73+
}
74+
}
75+
iov = iov2;
76+
}
77+
}
78+
79+
if (fd < g_fds.n && g_fds.p[fd].kind == kFdZip) {
80+
return _weaken(__zipos_read)(
81+
(struct ZiposHandle *)(intptr_t)g_fds.p[fd].handle, iov, iovlen, -1);
82+
} else if (IsLinux() || IsXnu() || IsFreebsd() || IsOpenbsd() || IsNetbsd()) {
83+
if (iovlen == 1) {
84+
return sys_read(fd, iov[0].iov_base, iov[0].iov_len);
85+
} else {
86+
return sys_readv(fd, iov, iovlen);
87+
}
88+
} else if (fd >= g_fds.n) {
89+
return ebadf();
90+
} else if (IsMetal()) {
91+
return sys_readv_metal(fd, iov, iovlen);
92+
} else if (IsWindows()) {
93+
return sys_readv_nt(fd, iov, iovlen);
94+
} else {
95+
return enosys();
96+
}
97+
}
98+
3599
/**
36100
* Reads data to multiple buffers.
37101
*
@@ -42,39 +106,19 @@
42106
* be passed to the kernel as read() instead. This yields a 100 cycle
43107
* performance boost in the case of a single small iovec.
44108
*
109+
* It's possible for file write request to be partially completed. For
110+
* example, if the sum of `iov` lengths exceeds 0x7ffff000 then bytes
111+
* beyond that will be ignored. This is a Linux behavior that Cosmo
112+
* polyfills across platforms.
113+
*
45114
* @return number of bytes actually read, or -1 w/ errno
46115
* @cancelationpoint
47116
* @restartable
48117
*/
49118
ssize_t readv(int fd, const struct iovec *iov, int iovlen) {
50119
ssize_t rc;
51120
BEGIN_CANCELATION_POINT;
52-
53-
if (fd < 0) {
54-
rc = ebadf();
55-
} else if (iovlen < 0) {
56-
rc = einval();
57-
} else if (IsAsan() && !__asan_is_valid_iov(iov, iovlen)) {
58-
rc = efault();
59-
} else if (fd < g_fds.n && g_fds.p[fd].kind == kFdZip) {
60-
rc = _weaken(__zipos_read)(
61-
(struct ZiposHandle *)(intptr_t)g_fds.p[fd].handle, iov, iovlen, -1);
62-
} else if (IsLinux() || IsXnu() || IsFreebsd() || IsOpenbsd() || IsNetbsd()) {
63-
if (iovlen == 1) {
64-
rc = sys_read(fd, iov[0].iov_base, iov[0].iov_len);
65-
} else {
66-
rc = sys_readv(fd, iov, iovlen);
67-
}
68-
} else if (fd >= g_fds.n) {
69-
rc = ebadf();
70-
} else if (IsMetal()) {
71-
rc = sys_readv_metal(fd, iov, iovlen);
72-
} else if (IsWindows()) {
73-
rc = sys_readv_nt(fd, iov, iovlen);
74-
} else {
75-
rc = enosys();
76-
}
77-
121+
rc = readv_impl(fd, iov, iovlen);
78122
END_CANCELATION_POINT;
79123
STRACE("readv(%d, [%s], %d) → %'ld% m", fd, DescribeIovec(rc, iov, iovlen),
80124
iovlen, rc);

libc/calls/write.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "libc/intrin/weaken.h"
2828
#include "libc/runtime/zipos.internal.h"
2929
#include "libc/sock/sock.h"
30+
#include "libc/stdio/sysparam.h"
3031
#include "libc/sysv/errfuns.h"
3132

3233
/**
@@ -39,6 +40,7 @@
3940
*
4041
* @param fd is open file descriptor
4142
* @param buf is copied from, cf. copy_file_range(), sendfile(), etc.
43+
* @param size is always saturated to 0x7ffff000 automatically
4244
* @return [1..size] bytes on success, or -1 w/ errno; noting zero is
4345
* impossible unless size was passed as zero to do an error check
4446
* @raise EBADF if `fd` is negative or not an open file descriptor
@@ -68,6 +70,10 @@ ssize_t write(int fd, const void *buf, size_t size) {
6870
ssize_t rc;
6971
BEGIN_CANCELATION_POINT;
7072

73+
// XNU and BSDs will EINVAL if requested bytes exceeds INT_MAX
74+
// this is inconsistent with Linux which ignores huge requests
75+
size = MIN(size, 0x7ffff000);
76+
7177
if (fd < 0) {
7278
rc = ebadf();
7379
} else if (IsAsan() && !__asan_is_valid(buf, size)) {

0 commit comments

Comments
 (0)