Skip to content

Commit ea8b03d

Browse files
committed
multibyte binary reader
1 parent 8c391e0 commit ea8b03d

File tree

3 files changed

+202
-42
lines changed

3 files changed

+202
-42
lines changed

include/nlohmann/detail/input/binary_reader.hpp

Lines changed: 56 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020
#include <string> // char_traits, string
2121
#include <utility> // make_pair, move
2222
#include <vector> // vector
23+
#ifdef __cpp_lib_byteswap
24+
#include <bit> //byteswap
25+
#endif
2326

2427
#include <nlohmann/detail/exceptions.hpp>
2528
#include <nlohmann/detail/input/input_adapters.hpp>
@@ -2754,6 +2757,29 @@ class binary_reader
27542757
return current = ia.get_character();
27552758
}
27562759

2760+
/*!
2761+
@brief get_to read into a primitive type
2762+
2763+
This function provides the interface to the used input adapter. It does
2764+
not throw in case the input reached EOF, but returns false instead
2765+
2766+
@return bool, whether the read was successful
2767+
*/
2768+
template<class T>
2769+
bool get_to(T& dest, const input_format_t format, const char* context)
2770+
{
2771+
auto new_chars_read = ia.get_elements(&dest);
2772+
chars_read += new_chars_read;
2773+
if (JSON_HEDLEY_UNLIKELY(new_chars_read < sizeof(T)))
2774+
{
2775+
// in case of failure, advance position by 1 to report failing location
2776+
++chars_read;
2777+
sax->parse_error(chars_read, "<end of file>", parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr));
2778+
return false;
2779+
}
2780+
return true;
2781+
}
2782+
27572783
/*!
27582784
@return character read from the input after ignoring all 'N' entries
27592785
*/
@@ -2768,6 +2794,28 @@ class binary_reader
27682794
return current;
27692795
}
27702796

2797+
template<class NumberType>
2798+
static void byte_swap(NumberType& number)
2799+
{
2800+
constexpr std::size_t sz = sizeof(number);
2801+
#ifdef __cpp_lib_byteswap
2802+
if constexpr (sz == 1)
2803+
{
2804+
return;
2805+
}
2806+
// convert float types to int types of the same size
2807+
using swap_t = std::conditional<sz == 2, std::uint16_t, typename std::conditional<sz == 4, std::uint32_t, std::uint64_t>::type>::type;
2808+
swap_t& number_ref = reinterpret_cast<swap_t&>(number);
2809+
number_ref = std::byteswap(number_ref);
2810+
#else
2811+
auto ptr = reinterpret_cast<std::uint8_t*>(&number);
2812+
for (std::size_t i = 0; i < sz / 2; ++i)
2813+
{
2814+
std::swap(ptr[i], ptr[sz - i - 1]);
2815+
}
2816+
#endif
2817+
}
2818+
27712819
/*
27722820
@brief read a number from the input
27732821
@@ -2786,29 +2834,16 @@ class binary_reader
27862834
template<typename NumberType, bool InputIsLittleEndian = false>
27872835
bool get_number(const input_format_t format, NumberType& result)
27882836
{
2789-
// step 1: read input into array with system's byte order
2790-
std::array<std::uint8_t, sizeof(NumberType)> vec{};
2791-
for (std::size_t i = 0; i < sizeof(NumberType); ++i)
2792-
{
2793-
get();
2794-
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
2795-
{
2796-
return false;
2797-
}
2837+
// read in the original format
27982838

2799-
// reverse byte order prior to conversion if necessary
2800-
if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
2801-
{
2802-
vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
2803-
}
2804-
else
2805-
{
2806-
vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
2807-
}
2839+
if (JSON_HEDLEY_UNLIKELY(!get_to(result, format, "number")))
2840+
{
2841+
return false;
2842+
}
2843+
if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
2844+
{
2845+
byte_swap(result);
28082846
}
2809-
2810-
// step 2: convert array into number of type T and return
2811-
std::memcpy(&result, vec.data(), sizeof(NumberType));
28122847
return true;
28132848
}
28142849

include/nlohmann/detail/input/input_adapters.hpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,13 @@ class file_input_adapter
6767
return std::fgetc(m_file);
6868
}
6969

70+
// returns the number of characters successfully read
71+
template<class T>
72+
std::size_t get_elements(T* dest, std::size_t count = 1)
73+
{
74+
return fread(dest, 1, sizeof(T) * count, m_file);
75+
}
76+
7077
private:
7178
/// the file pointer to read from
7279
std::FILE* m_file;
@@ -126,6 +133,17 @@ class input_stream_adapter
126133
return res;
127134
}
128135

136+
template<class T>
137+
std::size_t get_elements(T* dest, std::size_t count = 1)
138+
{
139+
auto res = sb->sgetn(reinterpret_cast<char*>(dest), count * sizeof(T));
140+
if (JSON_HEDLEY_UNLIKELY(res < count * sizeof(T)))
141+
{
142+
is->clear(is->rdstate() | std::ios::eofbit);
143+
}
144+
return static_cast<std::size_t>(res);
145+
}
146+
129147
private:
130148
/// the associated input stream
131149
std::istream* is = nullptr;
@@ -157,6 +175,26 @@ class iterator_input_adapter
157175
return char_traits<char_type>::eof();
158176
}
159177

178+
// for general iterators, we cannot really do something better than falling back to processing the range one-by-one
179+
template<class T>
180+
std::size_t get_elements(T* dest, std::size_t count = 1)
181+
{
182+
auto ptr = reinterpret_cast<char*>(dest);
183+
for (std::size_t read_index = 0; read_index < count * sizeof(T); ++read_index)
184+
{
185+
if (JSON_HEDLEY_LIKELY(current != end))
186+
{
187+
ptr[read_index] = *current;
188+
std::advance(current, 1);
189+
}
190+
else
191+
{
192+
return read_index;
193+
}
194+
}
195+
return count * sizeof(T);
196+
}
197+
160198
private:
161199
IteratorType current;
162200
IteratorType end;
@@ -320,6 +358,13 @@ class wide_string_input_adapter
320358
return utf8_bytes[utf8_bytes_index++];
321359
}
322360

361+
template<class T>
362+
std::size_t get_elements(T*, std::size_t = 1)
363+
{
364+
JSON_THROW(other_error::create(500, "Unexpected get_elements call to wchar input adapter", nullptr));
365+
return 0;
366+
}
367+
323368
private:
324369
BaseInputAdapter base_adapter;
325370

single_include/nlohmann/json.hpp

Lines changed: 101 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6133,6 +6133,9 @@ NLOHMANN_JSON_NAMESPACE_END
61336133
#include <string> // char_traits, string
61346134
#include <utility> // make_pair, move
61356135
#include <vector> // vector
6136+
#ifdef __cpp_lib_byteswap
6137+
#include <bit> //byteswap
6138+
#endif
61366139

61376140
// #include <nlohmann/detail/exceptions.hpp>
61386141

@@ -6209,6 +6212,13 @@ class file_input_adapter
62096212
return std::fgetc(m_file);
62106213
}
62116214

6215+
// returns the number of characters successfully read
6216+
template<class T>
6217+
std::size_t get_elements(T* dest, std::size_t count = 1)
6218+
{
6219+
return fread(dest, 1, sizeof(T) * count, m_file);
6220+
}
6221+
62126222
private:
62136223
/// the file pointer to read from
62146224
std::FILE* m_file;
@@ -6268,6 +6278,17 @@ class input_stream_adapter
62686278
return res;
62696279
}
62706280

6281+
template<class T>
6282+
std::size_t get_elements(T* dest, std::size_t count = 1)
6283+
{
6284+
auto res = sb->sgetn(reinterpret_cast<char*>(dest), count * sizeof(T));
6285+
if (JSON_HEDLEY_UNLIKELY(res < count * sizeof(T)))
6286+
{
6287+
is->clear(is->rdstate() | std::ios::eofbit);
6288+
}
6289+
return static_cast<std::size_t>(res);
6290+
}
6291+
62716292
private:
62726293
/// the associated input stream
62736294
std::istream* is = nullptr;
@@ -6299,6 +6320,26 @@ class iterator_input_adapter
62996320
return char_traits<char_type>::eof();
63006321
}
63016322

6323+
// for general iterators, we cannot really do something better than falling back to processing the range one-by-one
6324+
template<class T>
6325+
std::size_t get_elements(T* dest, std::size_t count = 1)
6326+
{
6327+
auto ptr = reinterpret_cast<char*>(dest);
6328+
for (std::size_t read_index = 0; read_index < count * sizeof(T); ++read_index)
6329+
{
6330+
if (JSON_HEDLEY_LIKELY(current != end))
6331+
{
6332+
ptr[read_index] = *current;
6333+
std::advance(current, 1);
6334+
}
6335+
else
6336+
{
6337+
return read_index;
6338+
}
6339+
}
6340+
return count * sizeof(T);
6341+
}
6342+
63026343
private:
63036344
IteratorType current;
63046345
IteratorType end;
@@ -6462,6 +6503,13 @@ class wide_string_input_adapter
64626503
return utf8_bytes[utf8_bytes_index++];
64636504
}
64646505

6506+
template<class T>
6507+
std::size_t get_elements(T* dest, std::size_t count = 1)
6508+
{
6509+
JSON_THROW(other_error::create(500, "Unexpected get_elements call to wchar input adapter", nullptr));
6510+
return 0;
6511+
}
6512+
64656513
private:
64666514
BaseInputAdapter base_adapter;
64676515

@@ -11900,6 +11948,29 @@ class binary_reader
1190011948
return current = ia.get_character();
1190111949
}
1190211950

11951+
/*!
11952+
@brief get_to read into a primitive type
11953+
11954+
This function provides the interface to the used input adapter. It does
11955+
not throw in case the input reached EOF, but returns false instead
11956+
11957+
@return bool, whether the read was successful
11958+
*/
11959+
template<class T>
11960+
bool get_to(T& dest, const input_format_t format, const char* context)
11961+
{
11962+
auto new_chars_read = ia.get_elements(&dest);
11963+
chars_read += new_chars_read;
11964+
if (JSON_HEDLEY_UNLIKELY(new_chars_read < sizeof(T)))
11965+
{
11966+
// in case of failure, advance position by 1 to report failing location
11967+
++chars_read;
11968+
sax->parse_error(chars_read, "<end of file>", parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context), nullptr));
11969+
return false;
11970+
}
11971+
return true;
11972+
}
11973+
1190311974
/*!
1190411975
@return character read from the input after ignoring all 'N' entries
1190511976
*/
@@ -11914,6 +11985,28 @@ class binary_reader
1191411985
return current;
1191511986
}
1191611987

11988+
template<class NumberType>
11989+
static void byte_swap(NumberType& number)
11990+
{
11991+
constexpr std::size_t sz = sizeof(number);
11992+
#ifdef __cpp_lib_byteswap
11993+
if constexpr (sz == 1)
11994+
{
11995+
return;
11996+
}
11997+
// convert float types to int types of the same size
11998+
using swap_t = std::conditional<sz == 2, std::uint16_t, typename std::conditional<sz == 4, std::uint32_t, std::uint64_t>::type>::type;
11999+
swap_t& number_ref = reinterpret_cast<swap_t&>(number);
12000+
number_ref = std::byteswap(number_ref);
12001+
#else
12002+
auto ptr = reinterpret_cast<std::uint8_t*>(&number);
12003+
for (std::size_t i = 0; i < sz / 2; ++i)
12004+
{
12005+
std::swap(ptr[i], ptr[sz - i - 1]);
12006+
}
12007+
#endif
12008+
}
12009+
1191712010
/*
1191812011
@brief read a number from the input
1191912012

@@ -11932,29 +12025,16 @@ class binary_reader
1193212025
template<typename NumberType, bool InputIsLittleEndian = false>
1193312026
bool get_number(const input_format_t format, NumberType& result)
1193412027
{
11935-
// step 1: read input into array with system's byte order
11936-
std::array<std::uint8_t, sizeof(NumberType)> vec{};
11937-
for (std::size_t i = 0; i < sizeof(NumberType); ++i)
11938-
{
11939-
get();
11940-
if (JSON_HEDLEY_UNLIKELY(!unexpect_eof(format, "number")))
11941-
{
11942-
return false;
11943-
}
12028+
// read in the original format
1194412029

11945-
// reverse byte order prior to conversion if necessary
11946-
if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
11947-
{
11948-
vec[sizeof(NumberType) - i - 1] = static_cast<std::uint8_t>(current);
11949-
}
11950-
else
11951-
{
11952-
vec[i] = static_cast<std::uint8_t>(current); // LCOV_EXCL_LINE
11953-
}
12030+
if (JSON_HEDLEY_UNLIKELY(!get_to(result, format, "number")))
12031+
{
12032+
return false;
12033+
}
12034+
if (is_little_endian != (InputIsLittleEndian || format == input_format_t::bjdata))
12035+
{
12036+
byte_swap(result);
1195412037
}
11955-
11956-
// step 2: convert array into number of type T and return
11957-
std::memcpy(&result, vec.data(), sizeof(NumberType));
1195812038
return true;
1195912039
}
1196012040

0 commit comments

Comments
 (0)