Skip to content

Commit f7ff515

Browse files
authored
*scanf() fixes to make TeX work (#1109)
* Fix reading the same symbol twice when using `{f,}scanf()` PR #924 appears to use `unget()` subtly incorrectly when parsing floating point numbers. The rest of the code only uses `unget()` immediately followed by `goto Done;` to return back the symbol that can't possibly belong to the directive we're processing. With floating-point, however, the ungot characters could very well be valid for the *next* directive, so we will essentially read them twice. It can't be seen in `sscanf()` tests because `unget()` is a no-op there, but the test I added for `fscanf()` fails like this: ... EXPECT_EQ(0xDEAD, i1) need 57005 (or 0xdead) = got 908973 (or 0x000ddead) ... EXPECT_EQ(0xBEEF, i2) need 48879 (or 0xbeef) = got 769775 (or 0x000bbeef) This means we read 0xDDEAD instead of 0xDEAD and 0xBBEEF instead of 0xBEEF. I checked that both musl and glibc read 0xDEAD/0xBEEF, as expected. Fix the failing test by removing the unneeded `unget()` calls. * Don't read invalid floating-point numbers in `*scanf()` Currently, we just ignore any errors from `strtod()`. They can happen either because no valid float can be parsed at all, or because the state machine recognizes only a prefix of a valid floating-point number. Fix this by making sure `strtod()` parses everything we recognized, provided it's non-empty. This requires to pop the last character off the FP buffer, which is supposed to be parsed by the next `*scanf()` directive. * Make `%c` parsing in `*scanf()` respect the C standard Currently, `%c`-style directives always succeed even if there are actually fewer characters in the input than requested. Before the fix, the added test fails like this: ... EXPECT_EQ(2, sscanf("ab", "%c %c %c", &c2, &c3, &c4)) need 2 (or 0x02 or '\2' or ENOENT) = got 3 (or 0x03 or '\3' or ESRCH) ... EXPECT_EQ(0, sscanf("abcd", "%5c", s2)) need 0 (or 0x0 or '\0') = got 1 (or 0x01 or '\1' or EPERM) musl and glibc pass this test.
1 parent 3afe3a3 commit f7ff515

File tree

3 files changed

+89
-22
lines changed

3 files changed

+89
-22
lines changed

libc/stdio/vcscanf.c

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,12 @@
5050
} \
5151
c; \
5252
})
53+
#define UNBUFFER \
54+
({ \
55+
if (c != -1) { \
56+
fpbuf[--fpbufcur] = '\0'; \
57+
} \
58+
})
5359

5460
/**
5561
* String / file / stream decoder.
@@ -369,10 +375,11 @@ int __vcscanf(int callback(void *), //
369375
}
370376
} while ((c = BUFFER) != -1 && c != ')');
371377
if (c == ')') {
372-
c = BUFFER;
378+
c = READ;
373379
}
374380
goto GotFloatingPointNumber;
375381
} else {
382+
UNBUFFER;
376383
goto GotFloatingPointNumber;
377384
}
378385
} else {
@@ -410,9 +417,7 @@ int __vcscanf(int callback(void *), //
410417
goto Done;
411418
}
412419
} else {
413-
if (c != -1 && unget) {
414-
unget(c, arg);
415-
}
420+
UNBUFFER;
416421
goto GotFloatingPointNumber;
417422
}
418423
} else {
@@ -465,13 +470,24 @@ int __vcscanf(int callback(void *), //
465470
Continue:
466471
continue;
467472
Break:
468-
if (c != -1 && unget) {
469-
unget(c, arg);
470-
}
473+
UNBUFFER;
471474
break;
472475
} while ((c = BUFFER) != -1);
473476
GotFloatingPointNumber:
474-
fp = strtod((char *)fpbuf, NULL);
477+
/* An empty buffer can't be a valid float; don't even bother parsing. */
478+
bool valid = fpbufcur > 0;
479+
if (valid) {
480+
char *ep;
481+
fp = strtod((char *)fpbuf, &ep);
482+
/* We should have parsed the whole buffer. */
483+
valid = ep == (char *)fpbuf + fpbufcur;
484+
}
485+
free(fpbuf);
486+
fpbuf = NULL;
487+
fpbufcur = fpbufsize = 0;
488+
if (!valid) {
489+
goto Done;
490+
}
475491
if (!discard) {
476492
++items;
477493
void *out = va_arg(va, void *);
@@ -481,9 +497,6 @@ int __vcscanf(int callback(void *), //
481497
*(double *)out = (double)fp;
482498
}
483499
}
484-
free(fpbuf);
485-
fpbuf = NULL;
486-
fpbufcur = fpbufsize = 0;
487500
continue;
488501
ReportConsumed:
489502
n_ptr = va_arg(va, int *);
@@ -537,6 +550,11 @@ int __vcscanf(int callback(void *), //
537550
if (!j && c == -1 && !items) {
538551
items = -1;
539552
goto Done;
553+
} else if (rawmode && j != width) {
554+
/* The C standard says that %c "matches a sequence of characters of
555+
* **exactly** the number specified by the field width". If we have
556+
* fewer characters, what we've just read is invalid. */
557+
goto Done;
540558
} else if (!rawmode && j < bufsize) {
541559
if (charbytes == sizeof(char)) {
542560
buf[j] = '\0';

test/libc/stdio/fscanf_test.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
2+
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
3+
╞══════════════════════════════════════════════════════════════════════════════╡
4+
│ Copyright 2024 Ivan Komarov │
5+
│ │
6+
│ Permission to use, copy, modify, and/or distribute this software for │
7+
│ any purpose with or without fee is hereby granted, provided that the │
8+
│ above copyright notice and this permission notice appear in all copies. │
9+
│ │
10+
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
11+
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
12+
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
13+
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
14+
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
15+
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
16+
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
17+
│ PERFORMANCE OF THIS SOFTWARE. │
18+
╚─────────────────────────────────────────────────────────────────────────────*/
19+
#include "libc/math.h"
20+
#include "libc/stdio/stdio.h"
21+
#include "libc/testlib/testlib.h"
22+
23+
TEST(fscanf, test_readAfterFloat) {
24+
FILE *f = fmemopen("infDEAD-.125e-2BEEF", 19, "r");
25+
float f1 = 666.666f, f2 = f1;
26+
int i1 = 666, i2 = i1;
27+
EXPECT_EQ(4, fscanf(f, "%f%x%f%x", &f1, &i1, &f2, &i2));
28+
EXPECT_TRUE(isinf(f1));
29+
EXPECT_EQ(0xDEAD, i1);
30+
EXPECT_EQ(-0.125e-2f, f2);
31+
EXPECT_EQ(0xBEEF, i2);
32+
fclose(f);
33+
}

test/libc/stdio/sscanf_test.c

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,17 @@ TEST(sscanf, testNonDirectiveCharacterMatching) {
6969
}
7070

7171
TEST(sscanf, testCharacter) {
72-
char c = 0;
73-
EXPECT_EQ(1, sscanf("a", "%c", &c));
74-
EXPECT_EQ('a', c);
72+
char c1 = 0, c2 = c1, c3 = c2, c4 = c3;
73+
char s1[32] = {0}, s2[32] = {0};
74+
EXPECT_EQ(1, sscanf("a", "%c", &c1));
75+
EXPECT_EQ(2, sscanf("ab", "%c %c %c", &c2, &c3, &c4));
76+
EXPECT_EQ(1, sscanf("abcde", "%5c", s1));
77+
EXPECT_EQ(0, sscanf("abcd", "%5c", s2));
78+
79+
EXPECT_EQ('a', c1);
80+
EXPECT_EQ('a', c2);
81+
EXPECT_EQ('b', c3);
82+
EXPECT_STREQ("abcde", &s1[0]);
7583
}
7684

7785
TEST(sscanf, testStringBuffer) {
@@ -394,14 +402,28 @@ TEST(sscanf, floating_point_infinity_double_precision) {
394402
EXPECT_TRUE(isinf(g));
395403
}
396404

405+
TEST(sscanf, floating_point_invalid) {
406+
float dummy;
407+
EXPECT_EQ(0, sscanf("junk", "%f", &dummy));
408+
EXPECT_EQ(0, sscanf("e9", "%f", &dummy));
409+
EXPECT_EQ(0, sscanf("-e9", "%f", &dummy));
410+
}
411+
412+
TEST(sscanf, floating_point_invalid_double_precision) {
413+
double dummy;
414+
EXPECT_EQ(0, sscanf("junk", "%lf", &dummy));
415+
EXPECT_EQ(0, sscanf("e9", "%lf", &dummy));
416+
EXPECT_EQ(0, sscanf("-e9", "%lf", &dummy));
417+
}
418+
397419
TEST(sscanf, floating_point_documentation_examples) {
398420
float a = 666.666f, b = a, c = b, d = c, e = d, f = e, g = f, h = g, i = h,
399421
j = i;
400422

401423
EXPECT_EQ(2, sscanf("111.11 -2.22", "%f %f", &a, &b));
402424
EXPECT_EQ(3, sscanf("Nan nan(2) inF", "%f %f %f", &c, &d, &e));
403425
EXPECT_EQ(
404-
5, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
426+
2, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
405427
"%f %f %f %f %f", &f, &g, &h, &i, &j));
406428

407429
EXPECT_EQ(111.11f, a);
@@ -411,9 +433,6 @@ TEST(sscanf, floating_point_documentation_examples) {
411433
EXPECT_TRUE(isinf(e));
412434
EXPECT_EQ(0X1.BC70A3D70A3D7P+6f, f);
413435
EXPECT_TRUE(isinf(g));
414-
EXPECT_EQ(-0.0000000123f, h);
415-
EXPECT_EQ(.0f, i);
416-
EXPECT_EQ(.0f, j);
417436
}
418437

419438
TEST(sscanf, floating_point_documentation_examples_double_precision) {
@@ -423,7 +442,7 @@ TEST(sscanf, floating_point_documentation_examples_double_precision) {
423442
EXPECT_EQ(2, sscanf("111.11 -2.22", "%lf %lf", &a, &b));
424443
EXPECT_EQ(3, sscanf("Nan nan(2) inF", "%lf %lf %lf", &c, &d, &e));
425444
EXPECT_EQ(
426-
5, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
445+
2, sscanf("0X1.BC70A3D70A3D7P+6 1.18973e+4932zzz -0.0000000123junk junk",
427446
"%lf %lf %lf %lf %lf", &f, &g, &h, &i, &j));
428447

429448
EXPECT_EQ(111.11, a);
@@ -433,9 +452,6 @@ TEST(sscanf, floating_point_documentation_examples_double_precision) {
433452
EXPECT_TRUE(isinf(e));
434453
EXPECT_EQ(0X1.BC70A3D70A3D7P+6, f);
435454
EXPECT_TRUE(isinf(g));
436-
EXPECT_EQ(-0.0000000123, h);
437-
EXPECT_EQ(.0, i);
438-
EXPECT_EQ(.0, j);
439455
}
440456

441457
TEST(sscanf, luplus) {

0 commit comments

Comments
 (0)