Skip to content
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
ea46579
Cache UTF-8 partials of ConhostConnection output pipe
german-one Jun 30, 2019
7c83c38
Revert "Cache UTF-8 partials of ConhostConnection output pipe"
german-one Jul 6, 2019
7a4a814
Fix for UTF-8 partials in functions `ConhostConnection::_OutputThread…
german-one Jul 6, 2019
a424a16
Revert "Fix for UTF-8 partials in functions `ConhostConnection::_Outp…
german-one Jul 6, 2019
85b5509
Fix for UTF-8 partials in function `ConhostConnection::_OutputThread`
german-one Jul 6, 2019
5780770
Fix for UTF-8 partials in function `ConhostConnection::_OutputThread`
german-one Jul 6, 2019
d0e6e82
Fix for UTF-8 partials in function `ConhostConnection::_OutputThread`
german-one Jul 6, 2019
d98c589
Fix for UTF-8 partials in function `ConhostConnection::_OutputThread`
german-one Jul 7, 2019
7df6609
Fix for UTF-8 partials in function `ApiRoutines::WriteConsoleOutputCh…
german-one Jul 7, 2019
507f526
Fix for UTF-8 partials in function `ConhostConnection::_OutputThread`
german-one Jul 7, 2019
7d8c4b1
Fix for UTF-8 partials in function `ConhostConnection::_OutputThread`
german-one Jul 9, 2019
f08f31c
Fix for UTF-8 partials in function `ConhostConnection::_OutputThread`
german-one Jul 9, 2019
bb299d7
Fix for UTF-8 partials in function `ConhostConnection::_OutputThread`
german-one Jul 9, 2019
c395f3b
Fix for UTF-8 partials in function `ConhostConnection::_OutputThread`
german-one Jul 9, 2019
23c4286
Utf8OutPipeReader class added
german-one Jul 11, 2019
99d0713
Utf8OutPipeReader class added
german-one Jul 12, 2019
c877edd
Utf8OutPipeReader class added
german-one Jul 12, 2019
e35a78f
Utf8OutPipeReader class added
german-one Jul 12, 2019
31db29b
Unit Test added
german-one Jul 14, 2019
d09ac87
use specific macros and WIL classes
german-one Jul 15, 2019
f23bcd9
avoid possible deadlock caused by unclosed pipe handle
german-one Jul 15, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 16 additions & 18 deletions src/cascadia/TerminalConnection/ConhostConnection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include <conpty-universal.h>
#include "../../types/inc/Utils.hpp"
#include "../../types/inc/UTF8OutPipeReader.hpp"

using namespace ::Microsoft::Console;

Expand Down Expand Up @@ -189,39 +190,36 @@ namespace winrt::Microsoft::Terminal::TerminalConnection::implementation

DWORD ConhostConnection::_OutputThread()
{
const size_t bufferSize = 4096;
BYTE buffer[bufferSize];
DWORD dwRead;
static UTF8OutPipeReader pipeReader{ _outPipe };
std::string_view strView{};

// process the data of the output pipe in a loop
while (true)
{
dwRead = 0;
bool fSuccess = false;

fSuccess = !!ReadFile(_outPipe.get(), buffer, bufferSize, &dwRead, nullptr);
if (!fSuccess)
HRESULT result = pipeReader.Read(strView);
if (FAILED(result))
{
if (_closing.load())
{
// This is okay, break out to kill the thread
return 0;
}
else
{
_disconnectHandlers();
return (DWORD)-1;
}

_disconnectHandlers();
return (DWORD)-1;
}
if (dwRead == 0)
else if (strView.empty())
{
continue;
return 0;
}

// Convert buffer to hstring
char* pchStr = (char*)(buffer);
std::string str{ pchStr, dwRead };
auto hstr = winrt::to_hstring(str);
auto hstr{ winrt::to_hstring(strView) };

// Pass the output to our registered event handlers
_outputHandlers(hstr);
}

return 0;
}
}
74 changes: 74 additions & 0 deletions src/types/UTF8OutPipeReader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.

#include "precomp.h"
#include "inc/Utf8OutPipeReader.hpp"
#include <type_traits>
#include <utility>

UTF8OutPipeReader::UTF8OutPipeReader(wil::unique_hfile& outPipe) :
_outPipe{ outPipe }
{
}

[[nodiscard]] HRESULT UTF8OutPipeReader::Read(_Out_ std::string_view& strView)
{
DWORD dwRead{};
bool fSuccess{};

// in case of early escaping
*_buffer = 0;
strView = reinterpret_cast<char*>(_buffer);

// copy UTF-8 code units that were remaining from the previously read chunk (if any)
if (_dwPartialsLen != 0)
{
std::move(_utf8Partials, _utf8Partials + _dwPartialsLen, _buffer);
}

// try to read data
fSuccess = !!ReadFile(_outPipe.get(), &_buffer[_dwPartialsLen], std::extent<decltype(_buffer)>::value - _dwPartialsLen, &dwRead, nullptr);

dwRead += _dwPartialsLen;
_dwPartialsLen = 0;

if (dwRead == 0) // quit if no data has been read and no cached data was left over
{
return S_OK;
}
else if (!fSuccess) // reading failed
{
return static_cast<HRESULT>(-1);
}

const BYTE* const endPtr{ _buffer + dwRead };
const BYTE* backIter{ endPtr - 1 };
// If the last byte in the buffer was a byte belonging to a UTF-8 multi-byte character
if ((*backIter & _Utf8BitMasks::MaskAsciiByte) > _Utf8BitMasks::IsAsciiByte)
{
// Check only up to 3 last bytes, if no Lead Byte was found then the byte before must be the Lead Byte and no partials are in the buffer
for (DWORD dwSequenceLen{ 1UL }, stop{ dwRead < 4UL ? dwRead : 4UL }; dwSequenceLen < stop; ++dwSequenceLen, --backIter)
{
// If Lead Byte found
if ((*backIter & _Utf8BitMasks::MaskContinuationByte) > _Utf8BitMasks::IsContinuationByte)
{
// If the Lead Byte indicates that the last bytes in the buffer is a partial UTF-8 code point then cache them:
// Use the bitmask at index `dwSequenceLen`. Compare the result with the operand having the same index. If they
// are not equal then the sequence has to be cached because it is a partial code point. Otherwise the
// sequence is a complete UTF-8 code point and the whole buffer is ready for the conversion to hstring.
if ((*backIter & _cmpMasks[dwSequenceLen]) != _cmpOperands[dwSequenceLen])
{
std::move(backIter, endPtr, _utf8Partials);
dwRead -= dwSequenceLen;
_dwPartialsLen = dwSequenceLen;
}

break;
}
}
}

// give back a view of the part of the buffer that contains complete code points only
strView = std::string_view{ reinterpret_cast<char*>(_buffer), dwRead };
return S_OK;
}
69 changes: 69 additions & 0 deletions src/types/inc/UTF8OutPipeReader.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*++
Copyright (c) Microsoft Corporation
Licensed under the MIT license.

Module Name:
- UTF8OutPipeReader.hpp

Abstract:
- This reads a UTF-8 stream and gives back a buffer that contains complete code points only
- Partial UTF-8 code points at the end of the buffer read are cached and prepended to the next chunk read

Author(s):
- Steffen Illhardt (german-one) 12-July-2019
--*/

#pragma once

#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif

#include <windows.h>
#include <wil\common.h>
#include <wil\resource.h>
#include <string_view>

class UTF8OutPipeReader final
{
public:
UTF8OutPipeReader(wil::unique_hfile& outPipe);
[[nodiscard]] HRESULT Read(_Out_ std::string_view& strView);

private:
wil::unique_hfile& _outPipe;

enum _Utf8BitMasks : BYTE
{
IsAsciiByte = 0b0'0000000, // Any byte representing an ASCII character has the MSB set to 0
MaskAsciiByte = 0b1'0000000, // Bit mask to be used in a bitwise AND operation to find out whether or not a byte match the IsAsciiByte pattern
IsContinuationByte = 0b10'000000, // Continuation bytes of any UTF-8 non-ASCII character have the MSB set to 1 and the adjacent bit set to 0
MaskContinuationByte = 0b11'000000, // Bit mask to be used in a bitwise AND operation to find out whether or not a byte match the IsContinuationByte pattern
IsLeadByteTwoByteSequence = 0b110'00000, // A lead byte that indicates a UTF-8 non-ASCII character consisting of two bytes has the two highest bits set to 1 and the adjacent bit set to 0
MaskLeadByteTwoByteSequence = 0b111'00000, // Bit mask to be used in a bitwise AND operation to find out whether or not a lead byte match the IsLeadByteTwoByteSequence pattern
IsLeadByteThreeByteSequence = 0b1110'0000, // A lead byte that indicates a UTF-8 non-ASCII character consisting of three bytes has the three highest bits set to 1 and the adjacent bit set to 0
MaskLeadByteThreeByteSequence = 0b1111'0000, // Bit mask to be used in a bitwise AND operation to find out whether or not a lead byte match the IsLeadByteThreeByteSequence pattern
IsLeadByteFourByteSequence = 0b11110'000, // A lead byte that indicates a UTF-8 non-ASCII character consisting of four bytes has the four highest bits set to 1 and the adjacent bit set to 0
MaskLeadByteFourByteSequence = 0b11111'000 // Bit mask to be used in a bitwise AND operation to find out whether or not a lead byte match the IsLeadByteFourByteSequence pattern
};

// array of bitmasks
constexpr const static BYTE _cmpMasks[]{
0, // unused
_Utf8BitMasks::MaskContinuationByte,
_Utf8BitMasks::MaskLeadByteTwoByteSequence,
_Utf8BitMasks::MaskLeadByteThreeByteSequence,
};

// array of values for the comparisons
constexpr const static BYTE _cmpOperands[]{
0, // unused
_Utf8BitMasks::IsAsciiByte, // intentionally conflicts with MaskContinuationByte
_Utf8BitMasks::IsLeadByteTwoByteSequence,
_Utf8BitMasks::IsLeadByteThreeByteSequence,
};

BYTE _buffer[4096]{ 0 }; // buffer for the chunk read
BYTE _utf8Partials[4]{ 0 }; // buffer for code units of a partial UTF-8 code point that have to be cached
DWORD _dwPartialsLen{}; // number of cached UTF-8 code units
};
2 changes: 2 additions & 0 deletions src/types/lib/types.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
<ClCompile Include="..\MenuEvent.cpp" />
<ClCompile Include="..\ModifierKeyState.cpp" />
<ClCompile Include="..\Utf16Parser.cpp" />
<ClCompile Include="..\UTF8OutPipeReader.cpp" />
<ClCompile Include="..\Viewport.cpp" />
<ClCompile Include="..\WindowBufferSizeEvent.cpp" />
<ClCompile Include="..\precomp.cpp">
Expand All @@ -24,6 +25,7 @@
<ClInclude Include="..\inc\convert.hpp" />
<ClInclude Include="..\inc\GlyphWidth.hpp" />
<ClInclude Include="..\inc\IInputEvent.hpp" />
<ClInclude Include="..\inc\UTF8OutPipeReader.hpp" />
<ClInclude Include="..\inc\Viewport.hpp" />
<ClInclude Include="..\inc\Utf16Parser.hpp" />
<ClInclude Include="..\precomp.h" />
Expand Down
6 changes: 6 additions & 0 deletions src/types/lib/types.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@
<ClCompile Include="..\utils.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="..\UTF8OutPipeReader.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\inc\IInputEvent.hpp">
Expand All @@ -83,6 +86,9 @@
<ClInclude Include="..\utils.hpp">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="..\inc\UTF8OutPipeReader.hpp">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Natvis Include="$(SolutionDir)tools\ConsoleTypes.natvis" />
Expand Down
1 change: 1 addition & 0 deletions src/types/ut_types/Types.Unit.Tests.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(SolutionDir)src\common.build.pre.props" />
<ItemGroup>
<ClCompile Include="UTF8OutPipeReaderTests.cpp" />
<ClCompile Include="UtilsTests.cpp" />
<ClCompile Include="UuidTests.cpp" />
<ClCompile Include="..\precomp.cpp">
Expand Down
Loading