-
Notifications
You must be signed in to change notification settings - Fork 9k
Fix for UTF-8 partials in function ConhostConnection::_OutputThread.
#1850
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
ea46579
7c83c38
7a4a814
a424a16
85b5509
5780770
d0e6e82
d98c589
7df6609
507f526
7d8c4b1
f08f31c
bb299d7
c395f3b
23c4286
99d0713
c877edd
e35a78f
31db29b
d09ac87
f23bcd9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,11 @@ | |
| #include "pch.h" | ||
| #include "ConhostConnection.h" | ||
| #include "windows.h" | ||
| #include "wil/common.h" | ||
| #include <sstream> | ||
| #include <string_view> | ||
| #include <algorithm> | ||
| #include <type_traits> | ||
| // STARTF_USESTDHANDLES is only defined in WINAPI_PARTITION_DESKTOP | ||
| // We're just gonna manually define it for this prototyping code | ||
| #ifndef STARTF_USESTDHANDLES | ||
|
|
@@ -190,38 +194,75 @@ namespace winrt::Microsoft::Terminal::TerminalConnection::implementation | |
| DWORD ConhostConnection::_OutputThread() | ||
| { | ||
| const size_t bufferSize = 4096; | ||
| BYTE buffer[bufferSize]; | ||
| DWORD dwRead; | ||
| BYTE buffer[bufferSize]{ 0 }; | ||
| BYTE utf8Partials[4]{ 0 }; // buffer for code units of a partial UTF-8 code point that have to be cached | ||
| DWORD dwRead{}; // length of a chunk read | ||
| DWORD dwPartialsLen{}; // number of cached UTF-8 code units | ||
| bool fSuccess{}; | ||
| const BYTE bitmasks[]{ 0, 0b11000000, 0b11100000, 0b11110000 }; // for comparisons after the Lead Byte was found | ||
|
|
||
| // process the data of the output pipe in a loop | ||
| while (true) | ||
| { | ||
| dwRead = 0; | ||
| bool fSuccess = false; | ||
| // copy UTF-8 code units that were remaining from the previously read chunk (if any) | ||
| if (dwPartialsLen != 0) | ||
| { | ||
| std::move(utf8Partials, utf8Partials + dwPartialsLen, buffer); | ||
| } | ||
|
|
||
| // try to read data | ||
| fSuccess = !!ReadFile(_outPipe.get(), &buffer[dwPartialsLen], std::extent<decltype(buffer)>::value - dwPartialsLen, &dwRead, nullptr); | ||
|
|
||
| dwRead += dwPartialsLen; | ||
| dwPartialsLen = 0; | ||
|
|
||
| fSuccess = !!ReadFile(_outPipe.get(), buffer, bufferSize, &dwRead, nullptr); | ||
| if (!fSuccess) | ||
| if (dwRead == 0) // quit if no data has been read and no cached data was left over | ||
| { | ||
| return 0; | ||
| } | ||
| else if (!fSuccess) | ||
| { | ||
| if (_closing.load()) | ||
| { | ||
| // This is okay, break out to kill the thread | ||
| return 0; | ||
| } | ||
| else | ||
| { | ||
| _disconnectHandlers(); | ||
| return (DWORD)-1; | ||
| } | ||
|
|
||
| _disconnectHandlers(); | ||
| return (DWORD)-1; | ||
| } | ||
| if (dwRead == 0) | ||
|
|
||
| const BYTE* const endPtr{ buffer + dwRead }; | ||
| const BYTE* backIter{ endPtr - 1 }; | ||
| // If the last byte in the buffer was a byte belonging to a UTF-8 multi-byte character | ||
| if (WI_AreAllFlagsSet(*backIter, 0b10000000)) | ||
| { | ||
| continue; | ||
| // Check only up to 3 last bytes, if no Lead Byte was found then the byte before must be the Lead Byte and no partials are in the buffer | ||
| for (DWORD dwSequenceLen{ 1UL }, stop{ dwRead < 4UL ? dwRead : 4UL }; dwSequenceLen < stop; ++dwSequenceLen, --backIter) | ||
| { | ||
| // If Lead Byte found | ||
| if (WI_AreAllFlagsSet(*backIter, 0b11000000)) | ||
| { | ||
| // If the Lead Byte indicates that the last bytes in the buffer is a partial UTF-8 code point then cache them | ||
| if (WI_IsAnyFlagClear(*backIter & bitmasks[dwSequenceLen], bitmasks[dwSequenceLen - 1])) | ||
|
||
| { | ||
| std::move(backIter, endPtr, utf8Partials); | ||
| dwRead -= dwSequenceLen; | ||
| dwPartialsLen = dwSequenceLen; | ||
| } | ||
|
|
||
| break; | ||
| } | ||
| } | ||
| } | ||
| // Convert buffer to hstring | ||
| char* pchStr = (char*)(buffer); | ||
| std::string str{ pchStr, dwRead }; | ||
| auto hstr = winrt::to_hstring(str); | ||
| const std::string_view strView{ reinterpret_cast<char*>(buffer), dwRead }; | ||
| auto hstr{ winrt::to_hstring(strView) }; | ||
|
|
||
| // Pass the output to our registered event handlers | ||
| _outputHandlers(hstr); | ||
| } | ||
|
|
||
| return 0; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we already have wil/common, string_view, algorithm and type_traits in our precompiled header through
pch.h->LibraryIncludes.h!There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm used to including the related headers. It's difficult for me to find the way across other header files to figure out which have been already included over detours.
Before I push the next commit I should probably check if it compiles without string_view, algorithm, and type_traits included directly.