Skip to content

json start/end position implementation #4517

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 35 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
02f5bca
Add implementation to retrieve start and end positions of json during…
sushshring Oct 28, 2024
311861f
Add more unit tests and add start/stop parsing for arrays
sushshring Nov 5, 2024
226d79b
Merge branch 'nlohmann:develop' into develop
sushshring Nov 6, 2024
b3f6499
Add raw value for all types
sushshring Nov 8, 2024
ab744aa
Merge branch 'develop' of https://github.com/sushshring/json into dev…
sushshring Nov 8, 2024
d321cdb
Add more tests and fix compiler warning
sushshring Nov 8, 2024
64ad6ce
Amalgamate
Nov 13, 2024
e820747
Fix CLang GCC warnings
Nov 13, 2024
3629ceb
Fix error in build
Nov 13, 2024
b42036b
Style using astyle 3.1
Nov 13, 2024
2575678
Fix whitespace changes
Nov 18, 2024
9de6ed1
revert
Nov 18, 2024
3bbca5e
more whitespace reverts
Nov 18, 2024
fa32e81
Address PR comments
Nov 19, 2024
b806d44
Fix failing issues
Nov 19, 2024
7d662ec
More whitespace reverts
Nov 19, 2024
3625875
Address remaining PR comments
Nov 21, 2024
9359441
Address comments
Nov 25, 2024
79e6513
Merge remote-tracking branch 'nlohmann/develop' into develop
Nov 25, 2024
a31d8b8
Switch to using custom base class instead of default basic_json
Nov 27, 2024
1d70d2b
Adding a basic using for a json using the new base class. Also addres…
Dec 4, 2024
814f367
Address decltype comments
Dec 6, 2024
4986e99
Diagnostic positions macro (#4)
sushshring Dec 12, 2024
b96a5d1
Fix missed include deletion
Dec 12, 2024
4406594
Add docs and address other PR comments (#5)
sushshring Dec 13, 2024
8c67186
Address new PR comments and fix CI tests for documentation
Dec 16, 2024
6c04575
Update documentation based on feedback (#6)
sushshring Dec 17, 2024
3d425d6
Merge branch 'develop' into develop
sushshring Dec 17, 2024
94505ba
Address std::size_t and other comments
Dec 17, 2024
556ab6b
Fix new CI issues
Dec 17, 2024
7f599cf
Fix lcov
Dec 17, 2024
5592cb3
Improve lcov case with update to handle_diagnostic_positions call for…
Dec 17, 2024
920e9a7
Fix indentation of LCOV_EXCL_STOP comments
sushshring Dec 18, 2024
aa14b15
fix amalgamation astyle issue
Dec 18, 2024
c4d1091
Merge remote-tracking branch 'nlohmann/develop' into develop
Dec 18, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/nlohmann/detail/input/binary_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ static inline bool little_endianness(int num = 1) noexcept
/*!
@brief deserialization of CBOR, MessagePack, and UBJSON values
*/
template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType>>
template<typename BasicJsonType, typename InputAdapterType, typename SAX = json_sax_dom_parser<BasicJsonType, InputAdapterType>>
class binary_reader
{
using number_integer_t = typename BasicJsonType::number_integer_t;
Expand Down
197 changes: 186 additions & 11 deletions include/nlohmann/detail/input/json_sax.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <nlohmann/detail/exceptions.hpp>
#include <nlohmann/detail/macro_scope.hpp>
#include <nlohmann/detail/string_concat.hpp>

#include <nlohmann/detail/input/lexer.hpp>
NLOHMANN_JSON_NAMESPACE_BEGIN

/*!
Expand Down Expand Up @@ -157,7 +157,7 @@ constructor contains the parsed value.

@tparam BasicJsonType the JSON type
*/
template<typename BasicJsonType>
template<typename BasicJsonType, typename InputAdapterType>
class json_sax_dom_parser
{
public:
Expand All @@ -166,14 +166,15 @@ class json_sax_dom_parser
using number_float_t = typename BasicJsonType::number_float_t;
using string_t = typename BasicJsonType::string_t;
using binary_t = typename BasicJsonType::binary_t;
using lexer_t = lexer<BasicJsonType, InputAdapterType>;

/*!
@param[in,out] r reference to a JSON value that is manipulated while
parsing
@param[in] allow_exceptions_ whether parse errors yield exceptions
*/
explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true)
: root(r), allow_exceptions(allow_exceptions_)
explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true, lexer_t* lexer_ = nullptr)
: root(r), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_)
{}

// make class move-only
Expand Down Expand Up @@ -229,6 +230,13 @@ class json_sax_dom_parser
{
ref_stack.push_back(handle_value(BasicJsonType::value_t::object));

// Manually set the start position of the object here.
// Ensure this is after the call to handle_value to ensure correct start position.
if (m_lexer_ref)
{
ref_stack.back()->start_position = m_lexer_ref->get_position() - 1;
}

if (JSON_HEDLEY_UNLIKELY(len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size()))
{
JSON_THROW(out_of_range::create(408, concat("excessive object size: ", std::to_string(len)), ref_stack.back()));
Expand All @@ -252,6 +260,12 @@ class json_sax_dom_parser
JSON_ASSERT(!ref_stack.empty());
JSON_ASSERT(ref_stack.back()->is_object());

if (m_lexer_ref)
{
// set end position of the object (inclusive)
ref_stack.back()->end_position = m_lexer_ref->get_position();
}

ref_stack.back()->set_parents();
ref_stack.pop_back();
return true;
Expand All @@ -261,6 +275,13 @@ class json_sax_dom_parser
{
ref_stack.push_back(handle_value(BasicJsonType::value_t::array));

// Manually set the start position of the array here.
// Ensure this is after the call to handle_value to ensure correct start position.
if (m_lexer_ref)
{
ref_stack.back()->start_position = m_lexer_ref->get_position() - 1;
}

if (JSON_HEDLEY_UNLIKELY(len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size()))
{
JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back()));
Expand All @@ -274,6 +295,12 @@ class json_sax_dom_parser
JSON_ASSERT(!ref_stack.empty());
JSON_ASSERT(ref_stack.back()->is_array());

if (m_lexer_ref)
{
// set end position of the object (inclusive)
ref_stack.back()->end_position = m_lexer_ref->get_position();
}

ref_stack.back()->set_parents();
ref_stack.pop_back();
return true;
Expand All @@ -298,6 +325,62 @@ class json_sax_dom_parser
}

private:

void set_start_end_pos_for_json_value(BasicJsonType& v)
{
if (m_lexer_ref)
{
v.end_position = m_lexer_ref->get_position();

switch (v.type())
{
case value_t::boolean:
{
v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5);
break;
}

case value_t::null:
{
v.start_position = v.end_position - 4;
break;
}

case value_t::string:
{
v.start_position = v.end_position - v.m_data.m_value.string->size() - 2;
break;
}

case value_t::discarded:
{
v.end_position = std::string::npos;
v.start_position = v.end_position;
break;
}
case value_t::binary:
case value_t::number_integer:
case value_t::number_unsigned:
case value_t::number_float:
{
v.start_position = v.end_position - m_lexer_ref->get_string().size();
break;
}
case value_t::object:
case value_t::array:
{
// object and array are handled in start_object() and start_array() handlers
// skip setting the values here.
break;
}
default:
{
// Handle all possible types discretely, default handler should never be reached.
JSON_ASSERT(false);
}
}
}
}
/*!
@invariant If the ref stack is empty, then the passed value will be the new
root.
Expand All @@ -311,6 +394,7 @@ class json_sax_dom_parser
if (ref_stack.empty())
{
root = BasicJsonType(std::forward<Value>(v));
set_start_end_pos_for_json_value(root);
return &root;
}

Expand All @@ -319,12 +403,14 @@ class json_sax_dom_parser
if (ref_stack.back()->is_array())
{
ref_stack.back()->m_data.m_value.array->emplace_back(std::forward<Value>(v));
set_start_end_pos_for_json_value(ref_stack.back()->m_data.m_value.array->back());
return &(ref_stack.back()->m_data.m_value.array->back());
}

JSON_ASSERT(ref_stack.back()->is_object());
JSON_ASSERT(object_element);
*object_element = BasicJsonType(std::forward<Value>(v));
set_start_end_pos_for_json_value(*object_element);
return object_element;
}

Expand All @@ -338,9 +424,11 @@ class json_sax_dom_parser
bool errored = false;
/// whether to throw exceptions in case of errors
const bool allow_exceptions = true;
/// the lexer reference to obtain the current position
lexer_t* m_lexer_ref = nullptr;
};

template<typename BasicJsonType>
template<typename BasicJsonType, typename InputAdapterType>
class json_sax_dom_callback_parser
{
public:
Expand All @@ -351,11 +439,13 @@ class json_sax_dom_callback_parser
using binary_t = typename BasicJsonType::binary_t;
using parser_callback_t = typename BasicJsonType::parser_callback_t;
using parse_event_t = typename BasicJsonType::parse_event_t;
using lexer_t = lexer<BasicJsonType, InputAdapterType>;

json_sax_dom_callback_parser(BasicJsonType& r,
parser_callback_t cb,
const bool allow_exceptions_ = true)
: root(r), callback(std::move(cb)), allow_exceptions(allow_exceptions_)
const parser_callback_t cb,
const bool allow_exceptions_ = true,
lexer_t* lexer_ = nullptr)
: root(r), callback(cb), allow_exceptions(allow_exceptions_), m_lexer_ref(lexer_)
{
keep_stack.push_back(true);
}
Expand Down Expand Up @@ -418,6 +508,13 @@ class json_sax_dom_callback_parser
auto val = handle_value(BasicJsonType::value_t::object, true);
ref_stack.push_back(val.second);

// Manually set the start position of the object here.
// Ensure this is after the call to handle_value to ensure correct start position.
if (m_lexer_ref && ref_stack.back())
{
ref_stack.back()->start_position = m_lexer_ref->get_position() - 1;
}

// check object limit
if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size()))
{
Expand Down Expand Up @@ -455,6 +552,10 @@ class json_sax_dom_callback_parser
}
else
{
if (m_lexer_ref)
{
ref_stack.back()->end_position = m_lexer_ref->get_position();
}
ref_stack.back()->set_parents();
}
}
Expand Down Expand Up @@ -488,10 +589,20 @@ class json_sax_dom_callback_parser
auto val = handle_value(BasicJsonType::value_t::array, true);
ref_stack.push_back(val.second);

// check array limit
if (ref_stack.back() && JSON_HEDLEY_UNLIKELY(len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size()))
if (ref_stack.back())
{
JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back()));
// Manually set the start position of the array here.
// Ensure this is after the call to handle_value to ensure correct start position.
if (m_lexer_ref)
{
ref_stack.back()->start_position = m_lexer_ref->get_position() - 1;
}

// check array limit
if (JSON_HEDLEY_UNLIKELY(len != static_cast<std::size_t>(-1) && len > ref_stack.back()->max_size()))
{
JSON_THROW(out_of_range::create(408, concat("excessive array size: ", std::to_string(len)), ref_stack.back()));
}
}

return true;
Expand All @@ -506,6 +617,10 @@ class json_sax_dom_callback_parser
keep = callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back());
if (keep)
{
if (m_lexer_ref)
{
ref_stack.back()->end_position = m_lexer_ref->get_position();
}
ref_stack.back()->set_parents();
}
else
Expand Down Expand Up @@ -548,6 +663,63 @@ class json_sax_dom_callback_parser
}

private:

template<typename Value>
void set_start_end_pos_for_json_value(Value& v)
{
if (m_lexer_ref)
{
v.end_position = m_lexer_ref->get_position();

switch (v.type())
{
case value_t::boolean:
{
v.start_position = v.end_position - (v.m_data.m_value.boolean ? 4 : 5);
break;
}

case value_t::null:
{
v.start_position = v.end_position - 4;
break;
}

case value_t::string:
{
v.start_position = v.end_position - v.m_data.m_value.string->size() - 2;
break;
}

case value_t::discarded:
{
v.end_position = std::string::npos;
v.start_position = v.end_position;
break;
}
case value_t::binary:
case value_t::number_integer:
case value_t::number_unsigned:
case value_t::number_float:
{
v.start_position = v.end_position - m_lexer_ref->get_string().size();
break;
}
case value_t::object:
case value_t::array:
{
// object and array are handled in start_object() and start_array() handlers
// skip setting the values here.
break;
}
default:
{
// Handle all possible types discretely, default handler should never be reached.
JSON_ASSERT(false);
}
}
}
}
/*!
@param[in] v value to add to the JSON value we build during parsing
@param[in] skip_callback whether we should skip calling the callback
Expand Down Expand Up @@ -577,6 +749,7 @@ class json_sax_dom_callback_parser

// create value
auto value = BasicJsonType(std::forward<Value>(v));
set_start_end_pos_for_json_value(value);

// check callback
const bool keep = skip_callback || callback(static_cast<int>(ref_stack.size()), parse_event_t::value, value);
Expand Down Expand Up @@ -645,6 +818,8 @@ class json_sax_dom_callback_parser
const bool allow_exceptions = true;
/// a discarded value for the callback
BasicJsonType discarded = BasicJsonType::value_t::discarded;
/// the lexer reference to obtain the current position
lexer_t* m_lexer_ref = nullptr;
};

template<typename BasicJsonType>
Expand Down
4 changes: 2 additions & 2 deletions include/nlohmann/detail/input/parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class parser
{
if (callback)
{
json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
json_sax_dom_callback_parser<BasicJsonType, InputAdapterType> sdp(result, callback, allow_exceptions, &m_lexer);
sax_parse_internal(&sdp);

// in strict mode, input must be completely read
Expand Down Expand Up @@ -122,7 +122,7 @@ class parser
}
else
{
json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
json_sax_dom_parser<BasicJsonType, InputAdapterType> sdp(result, allow_exceptions, &m_lexer);
sax_parse_internal(&sdp);

// in strict mode, input must be completely read
Expand Down
Loading
Loading