Skip to content

Commit 1122268

Browse files
committed
Make path formatting lossless with WTF-8
1 parent a4c7e17 commit 1122268

File tree

3 files changed

+29
-16
lines changed

3 files changed

+29
-16
lines changed

include/fmt/format.h

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1311,7 +1311,13 @@ class utf8_to_utf16 {
13111311
inline auto str() const -> std::wstring { return {&buffer_[0], size()}; }
13121312
};
13131313

1314-
enum class to_utf8_error_policy { abort, replace };
1314+
enum class to_utf8_error_policy { abort, replace, wtf };
1315+
1316+
inline void to_utf8_3bytes(buffer<char>& buf, uint32_t cp) {
1317+
buf.push_back(static_cast<char>(0xe0 | (cp >> 12)));
1318+
buf.push_back(static_cast<char>(0x80 | ((cp & 0xfff) >> 6)));
1319+
buf.push_back(static_cast<char>(0x80 | (cp & 0x3f)));
1320+
}
13151321

13161322
// A converter from UTF-16/UTF-32 (host endian) to UTF-8.
13171323
template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
@@ -1353,8 +1359,16 @@ template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
13531359
// Handle a surrogate pair.
13541360
++p;
13551361
if (p == s.end() || (c & 0xfc00) != 0xd800 || (*p & 0xfc00) != 0xdc00) {
1356-
if (policy == to_utf8_error_policy::abort) return false;
1357-
buf.append(string_view("\xEF\xBF\xBD"));
1362+
switch (policy) {
1363+
case to_utf8_error_policy::abort:
1364+
return false;
1365+
case to_utf8_error_policy::replace:
1366+
buf.append(string_view("\xEF\xBF\xBD"));
1367+
break;
1368+
case to_utf8_error_policy::wtf:
1369+
to_utf8_3bytes(buf, c);
1370+
break;
1371+
}
13581372
--p;
13591373
continue;
13601374
}
@@ -1366,9 +1380,7 @@ template <typename WChar, typename Buffer = memory_buffer> class to_utf8 {
13661380
buf.push_back(static_cast<char>(0xc0 | (c >> 6)));
13671381
buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
13681382
} else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) {
1369-
buf.push_back(static_cast<char>(0xe0 | (c >> 12)));
1370-
buf.push_back(static_cast<char>(0x80 | ((c & 0xfff) >> 6)));
1371-
buf.push_back(static_cast<char>(0x80 | (c & 0x3f)));
1383+
to_utf8_3bytes(buf, c);
13721384
} else if (c >= 0x10000 && c <= 0x10ffff) {
13731385
buf.push_back(static_cast<char>(0xf0 | (c >> 18)));
13741386
buf.push_back(static_cast<char>(0x80 | ((c & 0x3ffff) >> 12)));

include/fmt/std.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,12 @@ namespace detail {
8484
template <typename Char, typename PathChar>
8585
auto get_path_string(const std::filesystem::path& p,
8686
const std::basic_string<PathChar>& native) {
87-
if constexpr (std::is_same_v<Char, char> && std::is_same_v<PathChar, wchar_t>)
88-
return to_utf8<wchar_t>(native, to_utf8_error_policy::replace);
89-
else
87+
if constexpr (std::is_same_v<Char, char> &&
88+
std::is_same_v<PathChar, wchar_t>) {
89+
return to_utf8<wchar_t>(native, to_utf8_error_policy::wtf);
90+
} else {
9091
return p.string<Char>();
92+
}
9193
}
9294

9395
template <typename Char, typename PathChar>

test/std-test.cc

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,12 @@ TEST(std_test, path) {
3939
EXPECT_EQ(fmt::format("{}", path(L"\x0428\x0447\x0443\x0447\x044B\x043D\x0448"
4040
L"\x0447\x044B\x043D\x0430")),
4141
"Шчучыншчына");
42-
EXPECT_EQ(fmt::format("{}", path(L"\xd800")), "");
43-
EXPECT_EQ(fmt::format("{}", path(L"HEAD \xd800 TAIL")), "HEAD � TAIL");
44-
EXPECT_EQ(fmt::format("{}", path(L"HEAD \xD83D\xDE00 TAIL")),
45-
"HEAD \xF0\x9F\x98\x80 TAIL");
46-
EXPECT_EQ(fmt::format("{}", path(L"HEAD \xD83D\xD83D\xDE00 TAIL")),
47-
"HEAD �\xF0\x9F\x98\x80 TAIL");
48-
EXPECT_EQ(fmt::format("{:?}", path(L"\xd800")), "\"\\ud800\"");
42+
EXPECT_EQ(fmt::format("{}", path(L"\xD800")), "\xED\xA0\x80");
43+
EXPECT_EQ(fmt::format("{}", path(L"[\xD800]")), "[\xED\xA0\x80]");
44+
EXPECT_EQ(fmt::format("{}", path(L"[\xD83D\xDE00]")), "[\xF0\x9F\x98\x80]");
45+
EXPECT_EQ(fmt::format("{}", path(L"[\xD83D\xD83D\xDE00]")),
46+
"[\xED\xA0\xBD\xF0\x9F\x98\x80]");
47+
EXPECT_EQ(fmt::format("{:?}", path(L"\xD800")), "\"\\ud800\"");
4948
# endif
5049
}
5150

0 commit comments

Comments
 (0)