Skip to content

Commit d747f65

Browse files
authored
Merge pull request #11092 from weirdan/fix-invalid-utf8-output-in-json-reports
Fix JSON formatter crashes with invalid UTF in error messages
2 parents 4787eaf + bb57f39 commit d747f65

File tree

2 files changed

+73
-2
lines changed

2 files changed

+73
-2
lines changed

src/Psalm/Internal/Json/Json.php

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,12 @@
44

55
use RuntimeException;
66

7+
use function array_walk_recursive;
8+
use function bin2hex;
9+
use function is_string;
710
use function json_encode;
811
use function json_last_error_msg;
12+
use function preg_replace_callback;
913

1014
use const JSON_PRETTY_PRINT;
1115
use const JSON_UNESCAPED_SLASHES;
@@ -19,28 +23,80 @@
1923
final class Json
2024
{
2125
public const PRETTY = JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE;
26+
// from https://stackoverflow.com/a/11709412
27+
private const INVALID_UTF_REGEXP = <<<'EOF'
28+
/(
29+
[\xC0-\xC1] # Invalid UTF-8 Bytes
30+
| [\xF5-\xFF] # Invalid UTF-8 Bytes
31+
| \xE0[\x80-\x9F] # Overlong encoding of prior code point
32+
| \xF0[\x80-\x8F] # Overlong encoding of prior code point
33+
| [\xC2-\xDF](?![\x80-\xBF]) # Invalid UTF-8 Sequence Start
34+
| [\xE0-\xEF](?![\x80-\xBF]{2}) # Invalid UTF-8 Sequence Start
35+
| [\xF0-\xF4](?![\x80-\xBF]{3}) # Invalid UTF-8 Sequence Start
36+
| (?<=[\x00-\x7F\xF5-\xFF])[\x80-\xBF] # Invalid UTF-8 Sequence Middle
37+
| (?<!
38+
[\xC2-\xDF]
39+
|[\xE0-\xEF]
40+
|[\xE0-\xEF][\x80-\xBF]
41+
|[\xF0-\xF4]
42+
|[\xF0-\xF4][\x80-\xBF]
43+
|[\xF0-\xF4][\x80-\xBF]{2}
44+
)[\x80-\xBF] # Overlong Sequence
45+
| (?<=[\xE0-\xEF])[\x80-\xBF](?![\x80-\xBF]) # Short 3 byte sequence
46+
| (?<=[\xF0-\xF4])[\x80-\xBF](?![\x80-\xBF]{2}) # Short 4 byte sequence
47+
| (?<=[\xF0-\xF4][\x80-\xBF])[\x80-\xBF](?![\x80-\xBF]) # Short 4 byte sequence (2)
48+
)/x
49+
EOF;
2250

2351
/**
2452
* @var int
2553
*/
2654
public const DEFAULT = 0;
2755

2856
/**
29-
* @param mixed $data
57+
* @param array<array-key, mixed> $data
3058
* @psalm-pure
3159
*/
32-
public static function encode($data, ?int $options = null): string
60+
public static function encode(array $data, ?int $options = null): string
3361
{
3462
if ($options === null) {
3563
$options = self::DEFAULT;
3664
}
3765

3866
$result = json_encode($data, $options);
67+
68+
if ($result == false) {
69+
$result = json_encode(self::scrub($data), $options);
70+
}
71+
3972
if ($result === false) {
4073
/** @psalm-suppress ImpureFunctionCall */
4174
throw new RuntimeException('Cannot create JSON string: '.json_last_error_msg());
4275
}
4376

4477
return $result;
4578
}
79+
80+
/** @psalm-pure */
81+
private static function scrub(array $data): array
82+
{
83+
/** @psalm-suppress ImpureFunctionCall */
84+
array_walk_recursive(
85+
$data,
86+
/**
87+
* @psalm-pure
88+
* @param mixed $value
89+
*/
90+
function (&$value): void {
91+
if (is_string($value)) {
92+
$value = preg_replace_callback(
93+
self::INVALID_UTF_REGEXP,
94+
static fn(array $matches): string => '<Invalid UTF-8: 0x' . bin2hex($matches[0] ?? '') . '>',
95+
$value,
96+
);
97+
}
98+
},
99+
);
100+
return $data;
101+
}
46102
}

tests/Internal/JsonTest.php

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?php
2+
3+
namespace Psalm\Tests\Internal;
4+
5+
use Psalm\Internal\Json\Json;
6+
use Psalm\Tests\TestCase;
7+
8+
final class JsonTest extends TestCase
9+
{
10+
public function testConvertsInvalidUtf(): void
11+
{
12+
$invalidUtf = "\xd1"; // incomplete sequence like "ы"[0]
13+
$this->assertEquals('{"data":"<Invalid UTF-8: 0xd1>"}', Json::encode(["data" => $invalidUtf]));
14+
}
15+
}

0 commit comments

Comments
 (0)