@@ -1311,7 +1311,13 @@ class utf8_to_utf16 {
13111311 inline auto str () const -> std::wstring { return {&buffer_[0 ], size ()}; }
13121312};
13131313
1314- enum class to_utf8_error_policy { abort, replace };
1314+ enum class to_utf8_error_policy { abort, replace, wtf };
1315+
1316+ inline void to_utf8_3bytes (buffer<char >& buf, uint32_t cp) {
1317+ buf.push_back (static_cast <char >(0xe0 | (cp >> 12 )));
1318+ buf.push_back (static_cast <char >(0x80 | ((cp & 0xfff ) >> 6 )));
1319+ buf.push_back (static_cast <char >(0x80 | (cp & 0x3f )));
1320+ }
13151321
13161322// A converter from UTF-16/UTF-32 (host endian) to UTF-8.
13171323template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
@@ -1353,8 +1359,16 @@ template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
13531359 // Handle a surrogate pair.
13541360 ++p;
13551361 if (p == s.end () || (c & 0xfc00 ) != 0xd800 || (*p & 0xfc00 ) != 0xdc00 ) {
1356- if (policy == to_utf8_error_policy::abort) return false ;
1357- buf.append (string_view (" \xEF\xBF\xBD " ));
1362+ switch (policy) {
1363+ case to_utf8_error_policy::abort:
1364+ return false ;
1365+ case to_utf8_error_policy::replace:
1366+ buf.append (string_view (" \xEF\xBF\xBD " ));
1367+ break ;
1368+ case to_utf8_error_policy::wtf:
1369+ to_utf8_3bytes (buf, c);
1370+ break ;
1371+ }
13581372 --p;
13591373 continue ;
13601374 }
@@ -1366,9 +1380,7 @@ template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
13661380 buf.push_back (static_cast <char >(0xc0 | (c >> 6 )));
13671381 buf.push_back (static_cast <char >(0x80 | (c & 0x3f )));
13681382 } else if ((c >= 0x800 && c <= 0xd7ff ) || (c >= 0xe000 && c <= 0xffff )) {
1369- buf.push_back (static_cast <char >(0xe0 | (c >> 12 )));
1370- buf.push_back (static_cast <char >(0x80 | ((c & 0xfff ) >> 6 )));
1371- buf.push_back (static_cast <char >(0x80 | (c & 0x3f )));
1383+ to_utf8_3bytes (buf, c);
13721384 } else if (c >= 0x10000 && c <= 0x10ffff ) {
13731385 buf.push_back (static_cast <char >(0xf0 | (c >> 18 )));
13741386 buf.push_back (static_cast <char >(0x80 | ((c & 0x3ffff ) >> 12 )));
0 commit comments