Skip to content

Commit 935c6ee

Browse files
authored
Optimize binary get_number implementation by reading multiple bytes at once (#4391)
* multibyte binary reader * wide_string_input_adapter fallback to get_character Update input_adapters.hpp * Update json.hpp * Add from msgpack test * Test for broken msgpack with stream, address some warnings * Reading binary number from wchar as an error, address warnings * Not casting float to int, it violates strict aliasing rule
1 parent e41905f commit 935c6ee

File tree

5 files changed

+258
-42
lines changed

5 files changed

+258
-42
lines changed

include/nlohmann/detail/input/binary_reader.hpp

Lines changed: 56 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
#include <string> // char_traits, string
2121
#include <utility> // make_pair, move
2222
#include <vector> // vector
23+
#ifdef __cpp_lib_byteswap
24+
#include <bit> //byteswap
25+
#endif
2326

2427
#include <nlohmann/detail/exceptions.hpp>
2528
#include <nlohmann/detail/input/input_adapters.hpp>
@@ -2754,6 +2757,29 @@ class binary_reader
27542757
return current = ia.get_character();
27552758
}
27562759

2760+
/*!
2761+
@brief get_to read into a primitive type
2762+
2763+
This function provides the interface to the used input adapter. It does
2764+
not throw in case the input reached EOF, but returns false instead
2765+
2766+
@return bool, whether the read was successful
2767+
*/
2768+
template<class T>
2769+
bool get_to(T& dest, const input_format_t format, const char* context)
2770+
{
2771+
auto new_chars_read = ia.get_elements(&dest);
2772+
chars_read += new_chars_read;
2773+
if (JSON_HEDLEY_UNLIKELY(new_chars_read < sizeof(T)))
2774+
{
2775+
// in case of failure, advance position by 1 to report failing location
2776+
++chars_read;
2777+
sax->parse_error(chars_read, "<end of file>", parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr));
2778+
return false;
2779+
}
2780+
return true;
2781+
}
2782+
27572783
/*!
27582784
@return character read from the input after ignoring all 'N' entries
27592785
*/
@@ -2768,6 +2794,28 @@ class binary_reader
27682794
return current;
27692795
}
27702796

2797+
template<class NumberType>
2798+
static void byte_swap(NumberType& number)
2799+
{
2800+
constexpr std::size_t sz = sizeof(number);
2801+
#ifdef __cpp_lib_byteswap
2802+
if constexpr (sz == 1)
2803+
{
2804+
return;
2805+
}
2806+
if constexpr(std::is_integral_v<NumberType>)
2807+
{
2808+
number = std::byteswap(number);
2809+
return;
2810+
}
2811+
#endif
2812+
auto* ptr = reinterpret_cast<std::uint8_t*>(&number);
2813+
for (std::size_t i = 0; i < sz / 2; ++i)
2814+
{
2815+
std::swap(ptr[i], ptr[sz - i - 1]);
2816+
}
2817+
}
2818+
27712819
/*
27722820
@brief read a number from the input
27732821
@@ -2786,29 +2834,16 @@ class binary_reader
27862834
template<typename NumberType, bool InputIsLittleEndian = false>
27872835
bool get_number(const input_format_t format, NumberType& result)
27882836
{
2789-
// step 1: read input into array with system's byte order
2790-
std::array<std::uint8_t, sizeof(NumberType)> vec{};
2791-
for (std::size_t i = 0; i < sizeof(NumberType); ++i)
2792-
{
2793-
get();
2794-
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
2795-
{
2796-
return false;
2797-
}
2837+
// read in the original format
27982838

2799-
// reverse byte order prior to conversion if necessary
2800-
if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
2801-
{
2802-
vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
2803-
}
2804-
else
2805-
{
2806-
vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
2807-
}
2839+
if (JSON_HEDLEY_UNLIKELY(!get_to(result, format, "number")))
2840+
{
2841+
return false;
2842+
}
2843+
if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
2844+
{
2845+
byte_swap(result);
28082846
}
2809-
2810-
// step 2: convert array into number of type T and return
2811-
std::memcpy(&result, vec.data(), sizeof(NumberType));
28122847
return true;
28132848
}
28142849

include/nlohmann/detail/input/input_adapters.hpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,13 @@ class file_input_adapter
6868
return std::fgetc(m_file);
6969
}
7070

71+
// returns the number of characters successfully read
72+
template<class T>
73+
std::size_t get_elements(T* dest, std::size_t count = 1)
74+
{
75+
return fread(dest, 1, sizeof(T) * count, m_file);
76+
}
77+
7178
private:
7279
/// the file pointer to read from
7380
std::FILE* m_file;
@@ -127,6 +134,17 @@ class input_stream_adapter
127134
return res;
128135
}
129136

137+
template<class T>
138+
std::size_t get_elements(T* dest, std::size_t count = 1)
139+
{
140+
auto res = static_cast<std::size_t>(sb->sgetn(reinterpret_cast<char*>(dest), static_cast<std::streamsize>(count * sizeof(T))));
141+
if (JSON_HEDLEY_UNLIKELY(res < count * sizeof(T)))
142+
{
143+
is->clear(is->rdstate() | std::ios::eofbit);
144+
}
145+
return res;
146+
}
147+
130148
private:
131149
/// the associated input stream
132150
std::istream* is = nullptr;
@@ -158,6 +176,26 @@ class iterator_input_adapter
158176
return char_traits<char_type>::eof();
159177
}
160178

179+
// for general iterators, we cannot really do something better than falling back to processing the range one-by-one
180+
template<class T>
181+
std::size_t get_elements(T* dest, std::size_t count = 1)
182+
{
183+
auto* ptr = reinterpret_cast<char*>(dest);
184+
for (std::size_t read_index = 0; read_index < count * sizeof(T); ++read_index)
185+
{
186+
if (JSON_HEDLEY_LIKELY(current != end))
187+
{
188+
ptr[read_index] = static_cast<char>(*current);
189+
std::advance(current, 1);
190+
}
191+
else
192+
{
193+
return read_index;
194+
}
195+
}
196+
return count * sizeof(T);
197+
}
198+
161199
private:
162200
IteratorType current;
163201
IteratorType end;
@@ -321,6 +359,13 @@ class wide_string_input_adapter
321359
return utf8_bytes[utf8_bytes_index++];
322360
}
323361

362+
// parsing binary with wchar doesn't make sense, but since the parsing mode can be runtime, we need something here
363+
template<class T>
364+
std::size_t get_elements(T* /*dest*/, std::size_t /*count*/ = 1)
365+
{
366+
JSON_THROW(parse_error::create(112, 1, "wide string type cannot be interpreted as binary data", nullptr));
367+
}
368+
324369
private:
325370
BaseInputAdapter base_adapter;
326371

single_include/nlohmann/json.hpp

Lines changed: 101 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6220,6 +6220,9 @@ NLOHMANN_JSON_NAMESPACE_END
62206220
#include <string> // char_traits, string
62216221
#include <utility> // make_pair, move
62226222
#include <vector> // vector
6223+
#ifdef __cpp_lib_byteswap
6224+
#include <bit> //byteswap
6225+
#endif
62236226

62246227
// #include <nlohmann/detail/exceptions.hpp>
62256228

@@ -6298,6 +6301,13 @@ class file_input_adapter
62986301
return std::fgetc(m_file);
62996302
}
63006303

6304+
// returns the number of characters successfully read
6305+
template<class T>
6306+
std::size_t get_elements(T* dest, std::size_t count = 1)
6307+
{
6308+
return fread(dest, 1, sizeof(T) * count, m_file);
6309+
}
6310+
63016311
private:
63026312
/// the file pointer to read from
63036313
std::FILE* m_file;
@@ -6357,6 +6367,17 @@ class input_stream_adapter
63576367
return res;
63586368
}
63596369

6370+
template<class T>
6371+
std::size_t get_elements(T* dest, std::size_t count = 1)
6372+
{
6373+
auto res = static_cast<std::size_t>(sb->sgetn(reinterpret_cast<char*>(dest), static_cast<std::streamsize>(count * sizeof(T))));
6374+
if (JSON_HEDLEY_UNLIKELY(res < count * sizeof(T)))
6375+
{
6376+
is->clear(is->rdstate() | std::ios::eofbit);
6377+
}
6378+
return res;
6379+
}
6380+
63606381
private:
63616382
/// the associated input stream
63626383
std::istream* is = nullptr;
@@ -6388,6 +6409,26 @@ class iterator_input_adapter
63886409
return char_traits<char_type>::eof();
63896410
}
63906411

6412+
// for general iterators, we cannot really do something better than falling back to processing the range one-by-one
6413+
template<class T>
6414+
std::size_t get_elements(T* dest, std::size_t count = 1)
6415+
{
6416+
auto* ptr = reinterpret_cast<char*>(dest);
6417+
for (std::size_t read_index = 0; read_index < count * sizeof(T); ++read_index)
6418+
{
6419+
if (JSON_HEDLEY_LIKELY(current != end))
6420+
{
6421+
ptr[read_index] = static_cast<char>(*current);
6422+
std::advance(current, 1);
6423+
}
6424+
else
6425+
{
6426+
return read_index;
6427+
}
6428+
}
6429+
return count * sizeof(T);
6430+
}
6431+
63916432
private:
63926433
IteratorType current;
63936434
IteratorType end;
@@ -6551,6 +6592,13 @@ class wide_string_input_adapter
65516592
return utf8_bytes[utf8_bytes_index++];
65526593
}
65536594

6595+
// parsing binary with wchar doesn't make sense, but since the parsing mode can be runtime, we need something here
6596+
template<class T>
6597+
std::size_t get_elements(T* /*dest*/, std::size_t /*count*/ = 1)
6598+
{
6599+
JSON_THROW(parse_error::create(112, 1, "wide string type cannot be interpreted as binary data", nullptr));
6600+
}
6601+
65546602
private:
65556603
BaseInputAdapter base_adapter;
65566604

@@ -12007,6 +12055,29 @@ class binary_reader
1200712055
return current = ia.get_character();
1200812056
}
1200912057

12058+
/*!
12059+
@brief get_to read into a primitive type
12060+
12061+
This function provides the interface to the used input adapter. It does
12062+
not throw in case the input reached EOF, but returns false instead
12063+
12064+
@return bool, whether the read was successful
12065+
*/
12066+
template<class T>
12067+
bool get_to(T& dest, const input_format_t format, const char* context)
12068+
{
12069+
auto new_chars_read = ia.get_elements(&dest);
12070+
chars_read += new_chars_read;
12071+
if (JSON_HEDLEY_UNLIKELY(new_chars_read < sizeof(T)))
12072+
{
12073+
// in case of failure, advance position by 1 to report failing location
12074+
++chars_read;
12075+
sax->parse_error(chars_read, "<end of file>", parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr));
12076+
return false;
12077+
}
12078+
return true;
12079+
}
12080+
1201012081
/*!
1201112082
@return character read from the input after ignoring all 'N' entries
1201212083
*/
@@ -12021,6 +12092,28 @@ class binary_reader
1202112092
return current;
1202212093
}
1202312094

12095+
template<class NumberType>
12096+
static void byte_swap(NumberType& number)
12097+
{
12098+
constexpr std::size_t sz = sizeof(number);
12099+
#ifdef __cpp_lib_byteswap
12100+
if constexpr (sz == 1)
12101+
{
12102+
return;
12103+
}
12104+
if constexpr(std::is_integral_v<NumberType>)
12105+
{
12106+
number = std::byteswap(number);
12107+
return;
12108+
}
12109+
#endif
12110+
auto* ptr = reinterpret_cast<std::uint8_t*>(&number);
12111+
for (std::size_t i = 0; i < sz / 2; ++i)
12112+
{
12113+
std::swap(ptr[i], ptr[sz - i - 1]);
12114+
}
12115+
}
12116+
1202412117
/*
1202512118
@brief read a number from the input
1202612119

@@ -12039,29 +12132,16 @@ class binary_reader
1203912132
template<typename NumberType, bool InputIsLittleEndian = false>
1204012133
bool get_number(const input_format_t format, NumberType& result)
1204112134
{
12042-
// step 1: read input into array with system's byte order
12043-
std::array<std::uint8_t, sizeof(NumberType)> vec{};
12044-
for (std::size_t i = 0; i < sizeof(NumberType); ++i)
12045-
{
12046-
get();
12047-
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
12048-
{
12049-
return false;
12050-
}
12135+
// read in the original format
1205112136

12052-
// reverse byte order prior to conversion if necessary
12053-
if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
12054-
{
12055-
vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
12056-
}
12057-
else
12058-
{
12059-
vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
12060-
}
12137+
if (JSON_HEDLEY_UNLIKELY(!get_to(result, format, "number")))
12138+
{
12139+
return false;
12140+
}
12141+
if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
12142+
{
12143+
byte_swap(result);
1206112144
}
12062-
12063-
// step 2: convert array into number of type T and return
12064-
std::memcpy(&result, vec.data(), sizeof(NumberType));
1206512145
return true;
1206612146
}
1206712147

0 commit comments

Comments
 (0)