diff --git a/src/buffer/out/OutputCellIterator.cpp b/src/buffer/out/OutputCellIterator.cpp index 715c792af4..616d36c4a8 100644 --- a/src/buffer/out/OutputCellIterator.cpp +++ b/src/buffer/out/OutputCellIterator.cpp @@ -82,7 +82,7 @@ OutputCellIterator::OutputCellIterator(const CHAR_INFO& charInfo, const size_t f // - This is an iterator over a range of text only. No color data will be modified as the text is inserted. // Arguments: // - utf16Text - UTF-16 text range -OutputCellIterator::OutputCellIterator(const std::wstring_view utf16Text) : +OutputCellIterator::OutputCellIterator(const std::wstring_view utf16Text) noexcept : _mode(Mode::LooseTextOnly), _currentView(s_GenerateView(utf16Text)), _run(utf16Text), @@ -98,7 +98,7 @@ OutputCellIterator::OutputCellIterator(const std::wstring_view utf16Text) : // Arguments: // - utf16Text - UTF-16 text range // - attribute - Color to apply over the entire range -OutputCellIterator::OutputCellIterator(const std::wstring_view utf16Text, const TextAttribute& attribute, const size_t fillLimit) : +OutputCellIterator::OutputCellIterator(const std::wstring_view utf16Text, const TextAttribute& attribute, const size_t fillLimit) noexcept : _mode(Mode::Loose), _currentView(s_GenerateView(utf16Text, attribute)), _run(utf16Text), @@ -357,7 +357,7 @@ bool OutputCellIterator::_TryMoveTrailing() noexcept // - view - View representing characters corresponding to a single glyph // Return Value: // - Object representing the view into this cell -OutputCellView OutputCellIterator::s_GenerateView(const std::wstring_view view) +OutputCellView OutputCellIterator::s_GenerateView(const std::wstring_view view) noexcept { return s_GenerateView(view, InvalidTextAttribute, TextAttributeBehavior::Current); } @@ -372,8 +372,7 @@ OutputCellView OutputCellIterator::s_GenerateView(const std::wstring_view view) // - attr - Color attributes to apply to the text // Return Value: // - Object representing the view into this cell -OutputCellView OutputCellIterator::s_GenerateView(const std::wstring_view view, - const TextAttribute attr) +OutputCellView OutputCellIterator::s_GenerateView(const std::wstring_view view, const TextAttribute attr) noexcept { return s_GenerateView(view, attr, TextAttributeBehavior::Stored); } @@ -389,9 +388,7 @@ OutputCellView OutputCellIterator::s_GenerateView(const std::wstring_view view, // - behavior - Behavior of the given text attribute (used when writing) // Return Value: // - Object representing the view into this cell -OutputCellView OutputCellIterator::s_GenerateView(const std::wstring_view view, - const TextAttribute attr, - const TextAttributeBehavior behavior) +OutputCellView OutputCellIterator::s_GenerateView(const std::wstring_view view, const TextAttribute attr, const TextAttributeBehavior behavior) noexcept { const auto glyph = til::utf16_next(view); const auto dbcsAttr = IsGlyphFullWidth(glyph) ? DbcsAttribute::Leading : DbcsAttribute::Single; diff --git a/src/buffer/out/OutputCellIterator.hpp b/src/buffer/out/OutputCellIterator.hpp index 89447a5102..2229dca25a 100644 --- a/src/buffer/out/OutputCellIterator.hpp +++ b/src/buffer/out/OutputCellIterator.hpp @@ -37,8 +37,8 @@ public: OutputCellIterator(const TextAttribute& attr, const size_t fillLimit = 0) noexcept; OutputCellIterator(const wchar_t& wch, const TextAttribute& attr, const size_t fillLimit = 0) noexcept; OutputCellIterator(const CHAR_INFO& charInfo, const size_t fillLimit = 0) noexcept; - OutputCellIterator(const std::wstring_view utf16Text); - OutputCellIterator(const std::wstring_view utf16Text, const TextAttribute& attribute, const size_t fillLimit = 0); + OutputCellIterator(const std::wstring_view utf16Text) noexcept; + OutputCellIterator(const std::wstring_view utf16Text, const TextAttribute& attribute, const size_t fillLimit = 0) noexcept; OutputCellIterator(const gsl::span legacyAttributes) noexcept; OutputCellIterator(const gsl::span charInfos) noexcept; OutputCellIterator(const gsl::span cells); @@ -100,15 +100,9 @@ private: bool _TryMoveTrailing() noexcept; - static OutputCellView s_GenerateView(const std::wstring_view view); - - static OutputCellView s_GenerateView(const std::wstring_view view, - const TextAttribute attr); - - static OutputCellView s_GenerateView(const std::wstring_view view, - const TextAttribute attr, - const TextAttributeBehavior behavior); - + static OutputCellView s_GenerateView(const std::wstring_view view) noexcept; + static OutputCellView s_GenerateView(const std::wstring_view view, const TextAttribute attr) noexcept; + static OutputCellView s_GenerateView(const std::wstring_view view, const TextAttribute attr, const TextAttributeBehavior behavior) noexcept; static OutputCellView s_GenerateView(const wchar_t& wch) noexcept; static OutputCellView s_GenerateViewLegacyAttr(const WORD& legacyAttr) noexcept; static OutputCellView s_GenerateView(const TextAttribute& attr) noexcept; diff --git a/src/cascadia/PublicTerminalCore/HwndTerminal.cpp b/src/cascadia/PublicTerminalCore/HwndTerminal.cpp index 1d9daf1c54..f5ee0c4191 100644 --- a/src/cascadia/PublicTerminalCore/HwndTerminal.cpp +++ b/src/cascadia/PublicTerminalCore/HwndTerminal.cpp @@ -24,25 +24,6 @@ static constexpr bool _IsMouseMessage(UINT uMsg) uMsg == WM_MOUSEMOVE || uMsg == WM_MOUSEWHEEL || uMsg == WM_MOUSEHWHEEL; } -// Helper static function to ensure that all ambiguous-width glyphs are reported as narrow. -// See microsoft/terminal#2066 for more info. -static bool _IsGlyphWideForceNarrowFallback(const std::wstring_view /* glyph */) noexcept -{ - return false; // glyph is not wide. -} - -static bool _EnsureStaticInitialization() -{ - // use C++11 magic statics to make sure we only do this once. - static auto initialized = []() { - // *** THIS IS A SINGLETON *** - SetGlyphWidthFallback(_IsGlyphWideForceNarrowFallback); - - return true; - }(); - return initialized; -} - LRESULT CALLBACK HwndTerminal::HwndTerminalWndProc( HWND hwnd, UINT uMsg, @@ -175,7 +156,7 @@ static bool RegisterTermClass(HINSTANCE hInstance) noexcept return RegisterClassW(&wc) != 0; } -HwndTerminal::HwndTerminal(HWND parentHwnd) : +HwndTerminal::HwndTerminal(HWND parentHwnd) noexcept : _desiredFont{ L"Consolas", 0, DEFAULT_FONT_WEIGHT, 14, CP_UTF8 }, _actualFont{ L"Consolas", 0, DEFAULT_FONT_WEIGHT, { 0, 14 }, CP_UTF8, false }, _uiaProvider{ nullptr }, @@ -183,8 +164,6 @@ HwndTerminal::HwndTerminal(HWND parentHwnd) : _pfnWriteCallback{ nullptr }, _multiClickTime{ 500 } // this will be overwritten by the windows system double-click time { - _EnsureStaticInitialization(); - auto hInstance = wil::GetModuleInstanceHandle(); if (RegisterTermClass(hInstance)) diff --git a/src/cascadia/PublicTerminalCore/HwndTerminal.hpp b/src/cascadia/PublicTerminalCore/HwndTerminal.hpp index 6df223e804..4b71991a89 100644 --- a/src/cascadia/PublicTerminalCore/HwndTerminal.hpp +++ b/src/cascadia/PublicTerminalCore/HwndTerminal.hpp @@ -49,7 +49,7 @@ __declspec(dllexport) void _stdcall TerminalKillFocus(void* terminal); struct HwndTerminal : ::Microsoft::Console::Types::IControlAccessibilityInfo { public: - HwndTerminal(HWND hwnd); + HwndTerminal(HWND hwnd) noexcept; HwndTerminal(const HwndTerminal&) = default; HwndTerminal(HwndTerminal&&) = default; diff --git a/src/cascadia/TerminalControl/ControlCore.cpp b/src/cascadia/TerminalControl/ControlCore.cpp index 02dce216b2..ded43ff89d 100644 --- a/src/cascadia/TerminalControl/ControlCore.cpp +++ b/src/cascadia/TerminalControl/ControlCore.cpp @@ -63,25 +63,6 @@ namespace winrt::Microsoft::Terminal::Control::implementation return result; } - // Helper static function to ensure that all ambiguous-width glyphs are reported as narrow. - // See microsoft/terminal#2066 for more info. - static bool _IsGlyphWideForceNarrowFallback(const std::wstring_view /* glyph */) - { - return false; // glyph is not wide. - } - - static bool _EnsureStaticInitialization() - { - // use C++11 magic statics to make sure we only do this once. - static auto initialized = []() { - // *** THIS IS A SINGLETON *** - SetGlyphWidthFallback(_IsGlyphWideForceNarrowFallback); - - return true; - }(); - return initialized; - } - TextColor SelectionColor::AsTextColor() const noexcept { if (_IsIndex16) @@ -101,8 +82,6 @@ namespace winrt::Microsoft::Terminal::Control::implementation _desiredFont{ DEFAULT_FONT_FACE, 0, DEFAULT_FONT_WEIGHT, DEFAULT_FONT_SIZE, CP_UTF8 }, _actualFont{ DEFAULT_FONT_FACE, 0, DEFAULT_FONT_WEIGHT, { 0, DEFAULT_FONT_SIZE }, CP_UTF8, false } { - _EnsureStaticInitialization(); - _settings = winrt::make_self(settings, unfocusedAppearance); _terminal = std::make_shared<::Microsoft::Terminal::Core::Terminal>(); diff --git a/src/host/ut_host/CodepointWidthDetectorTests.cpp b/src/host/ut_host/CodepointWidthDetectorTests.cpp index fdbc775e13..9f433eb7c3 100644 --- a/src/host/ut_host/CodepointWidthDetectorTests.cpp +++ b/src/host/ut_host/CodepointWidthDetectorTests.cpp @@ -19,7 +19,7 @@ static const std::vector> { 0x7, L"\a", CodepointWidth::Narrow }, // BEL { 0x20, L" ", CodepointWidth::Narrow }, { 0x39, L"9", CodepointWidth::Narrow }, - { 0x414, L"\x414", CodepointWidth::Ambiguous }, // U+0414 cyrillic capital de + { 0x414, L"\x414", CodepointWidth::Narrow }, // U+0414 cyrillic capital de { 0x1104, L"\x1104", CodepointWidth::Wide }, // U+1104 hangul choseong ssangtikeut { 0x306A, L"\x306A", CodepointWidth::Wide }, // U+306A hiragana na { 0x30CA, L"\x30CA", CodepointWidth::Wide }, // U+30CA katakana na @@ -38,18 +38,6 @@ class CodepointWidthDetectorTests VERIFY_IS_TRUE(widthDetector.IsWide(emoji)); } - TEST_METHOD(CanExtractCodepoint) - { - CodepointWidthDetector widthDetector; - for (const auto& data : testData) - { - const auto& expected = std::get<0>(data); - const auto& wstr = std::get<1>(data); - const auto result = widthDetector._extractCodepoint({ wstr.c_str(), wstr.size() }); - VERIFY_ARE_EQUAL(result, expected); - } - } - TEST_METHOD(CanGetWidths) { CodepointWidthDetector widthDetector; @@ -91,8 +79,8 @@ class CodepointWidthDetectorTests // Cached item should match what we expect const auto it = widthDetector._fallbackCache.begin(); - VERIFY_ARE_EQUAL(ambiguous, it->first); - VERIFY_ARE_EQUAL(FallbackMethod(ambiguous), it->second); + VERIFY_ARE_EQUAL(ambiguous[0], it->first); + VERIFY_ARE_EQUAL(FallbackMethod(ambiguous) ? 2u : 1u, it->second); // Cache should empty when font changes. widthDetector.NotifyFontChanged(); diff --git a/src/interactivity/base/EventSynthesis.cpp b/src/interactivity/base/EventSynthesis.cpp index 398b72730c..3b283b001a 100644 --- a/src/interactivity/base/EventSynthesis.cpp +++ b/src/interactivity/base/EventSynthesis.cpp @@ -38,7 +38,7 @@ static CodepointWidth GetQuickCharWidthLegacyForNumpadEventSynthesis(const wchar return CodepointWidth::Wide; } - return CodepointWidth::Invalid; + return CodepointWidth::Narrow; } std::deque> Microsoft::Console::Interactivity::CharToKeyEvents(const wchar_t wch, diff --git a/src/types/CodepointWidthDetector.cpp b/src/types/CodepointWidthDetector.cpp index c48b90fb9c..6848aa5b61 100644 --- a/src/types/CodepointWidthDetector.cpp +++ b/src/types/CodepointWidthDetector.cpp @@ -9,9 +9,9 @@ namespace // used to store range data in CodepointWidthDetector's internal map struct UnicodeRange final { - unsigned int lowerBound; - unsigned int upperBound; - CodepointWidth width; + char32_t lowerBound; + char32_t upperBound : 31; + char32_t isAmbiguous : 1; }; static bool operator<(const UnicodeRange& range, const unsigned int searchTerm) noexcept @@ -19,382 +19,348 @@ namespace return range.upperBound < searchTerm; } - // Generated by Generate-CodepointWidthsFromUCD.ps1 -Pack:True -Full:False -NoOverrides:False - // on 9/14/2022 7:12:26 PM (UTC) from Unicode 15.0.0. - // 321281 (0x4E701) codepoints covered. + // Generated by Generate-CodepointWidthsFromUCD.ps1 -Pack:True -Full: -NoOverrides:False + // on 2022-11-15 19:54:23Z from Unicode 15.0.0. + // 321149 (0x4E67D) codepoints covered. // 240 (0xF0) codepoints overridden. // Override path: .\src\types\unicode_width_overrides.xml - static constexpr std::array s_wideAndAmbiguousTable{ - UnicodeRange{ 0xa1, 0xa1, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xa4, 0xa4, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xa7, 0xa8, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xaa, 0xaa, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xad, 0xae, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xb0, 0xb4, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xb6, 0xba, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xbc, 0xbf, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xc6, 0xc6, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xd0, 0xd0, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xd7, 0xd8, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xde, 0xe1, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xe6, 0xe6, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xe8, 0xea, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xec, 0xed, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xf0, 0xf0, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xf2, 0xf3, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xf7, 0xfa, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xfc, 0xfc, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xfe, 0xfe, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x101, 0x101, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x111, 0x111, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x113, 0x113, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x11b, 0x11b, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x126, 0x127, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x12b, 0x12b, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x131, 0x133, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x138, 0x138, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x13f, 0x142, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x144, 0x144, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x148, 0x14b, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x14d, 0x14d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x152, 0x153, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x166, 0x167, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x16b, 0x16b, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1ce, 0x1ce, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1d0, 0x1d0, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1d2, 0x1d2, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1d4, 0x1d4, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1d6, 0x1d6, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1d8, 0x1d8, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1da, 0x1da, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1dc, 0x1dc, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x251, 0x251, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x261, 0x261, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2c4, 0x2c4, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2c7, 0x2c7, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2c9, 0x2cb, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2cd, 0x2cd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2d0, 0x2d0, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2d8, 0x2db, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2dd, 0x2dd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2df, 0x2df, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x300, 0x36f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x391, 0x3a1, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x3a3, 0x3a9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x3b1, 0x3c1, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x3c3, 0x3c9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x401, 0x401, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x410, 0x44f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x451, 0x451, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1100, 0x115f, CodepointWidth::Wide }, - UnicodeRange{ 0x2010, 0x2010, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2013, 0x2016, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2018, 0x2019, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x201c, 0x201d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2020, 0x2022, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2024, 0x2027, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2030, 0x2030, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2032, 0x2033, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2035, 0x2035, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x203b, 0x203b, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x203e, 0x203e, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2074, 0x2074, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x207f, 0x207f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2081, 0x2084, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x20ac, 0x20ac, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2103, 0x2103, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2105, 0x2105, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2109, 0x2109, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2113, 0x2113, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2116, 0x2116, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2121, 0x2122, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2126, 0x2126, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x212b, 0x212b, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2153, 0x2154, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x215b, 0x215e, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2160, 0x216b, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2170, 0x2179, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2189, 0x2189, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2190, 0x2199, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x21b8, 0x21b9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x21d2, 0x21d2, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x21d4, 0x21d4, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x21e7, 0x21e7, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2200, 0x2200, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2202, 0x2203, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2207, 0x2208, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x220b, 0x220b, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x220f, 0x220f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2211, 0x2211, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2215, 0x2215, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x221a, 0x221a, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x221d, 0x2220, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2223, 0x2223, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2225, 0x2225, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2227, 0x222c, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x222e, 0x222e, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2234, 0x2237, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x223c, 0x223d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2248, 0x2248, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x224c, 0x224c, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2252, 0x2252, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2260, 0x2261, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2264, 0x2267, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x226a, 0x226b, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x226e, 0x226f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2282, 0x2283, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2286, 0x2287, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2295, 0x2295, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2299, 0x2299, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x22a5, 0x22a5, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x22bf, 0x22bf, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2312, 0x2312, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x231a, 0x231b, CodepointWidth::Wide }, - UnicodeRange{ 0x2329, 0x232a, CodepointWidth::Wide }, - UnicodeRange{ 0x23e9, 0x23ec, CodepointWidth::Wide }, - UnicodeRange{ 0x23f0, 0x23f0, CodepointWidth::Wide }, - UnicodeRange{ 0x23f3, 0x23f3, CodepointWidth::Wide }, - UnicodeRange{ 0x2460, 0x24e9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x24eb, 0x24ff, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2500, 0x259f, CodepointWidth::Narrow }, // box-drawing and block elements require 1-cell alignment - UnicodeRange{ 0x25a0, 0x25a1, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25a3, 0x25a9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25b2, 0x25b3, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25b6, 0x25b7, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25bc, 0x25bd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25c0, 0x25c1, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25c6, 0x25c8, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25cb, 0x25cb, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25ce, 0x25d1, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25e2, 0x25e5, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25ef, 0x25ef, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x25fd, 0x25fe, CodepointWidth::Wide }, - UnicodeRange{ 0x2605, 0x2606, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2609, 0x2609, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x260e, 0x260f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2614, 0x2615, CodepointWidth::Wide }, - UnicodeRange{ 0x261c, 0x261c, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x261e, 0x261e, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2640, 0x2640, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2642, 0x2642, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2648, 0x2653, CodepointWidth::Wide }, - UnicodeRange{ 0x2660, 0x2661, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2663, 0x2665, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2667, 0x266a, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x266c, 0x266d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x266f, 0x266f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x267f, 0x267f, CodepointWidth::Wide }, - UnicodeRange{ 0x2693, 0x2693, CodepointWidth::Wide }, - UnicodeRange{ 0x269e, 0x269f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26a1, 0x26a1, CodepointWidth::Wide }, - UnicodeRange{ 0x26aa, 0x26ab, CodepointWidth::Wide }, - UnicodeRange{ 0x26bd, 0x26be, CodepointWidth::Wide }, - UnicodeRange{ 0x26bf, 0x26bf, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26c4, 0x26c5, CodepointWidth::Wide }, - UnicodeRange{ 0x26c6, 0x26cd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26ce, 0x26ce, CodepointWidth::Wide }, - UnicodeRange{ 0x26cf, 0x26d3, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26d4, 0x26d4, CodepointWidth::Wide }, - UnicodeRange{ 0x26d5, 0x26e1, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26e3, 0x26e3, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26e8, 0x26e9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26ea, 0x26ea, CodepointWidth::Wide }, - UnicodeRange{ 0x26eb, 0x26f1, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26f2, 0x26f3, CodepointWidth::Wide }, - UnicodeRange{ 0x26f4, 0x26f4, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26f5, 0x26f5, CodepointWidth::Wide }, - UnicodeRange{ 0x26f6, 0x26f9, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26fa, 0x26fa, CodepointWidth::Wide }, - UnicodeRange{ 0x26fb, 0x26fc, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x26fd, 0x26fd, CodepointWidth::Wide }, - UnicodeRange{ 0x26fe, 0x26ff, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2705, 0x2705, CodepointWidth::Wide }, - UnicodeRange{ 0x270a, 0x270b, CodepointWidth::Wide }, - UnicodeRange{ 0x2728, 0x2728, CodepointWidth::Wide }, - UnicodeRange{ 0x273d, 0x273d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x274c, 0x274c, CodepointWidth::Wide }, - UnicodeRange{ 0x274e, 0x274e, CodepointWidth::Wide }, - UnicodeRange{ 0x2753, 0x2755, CodepointWidth::Wide }, - UnicodeRange{ 0x2757, 0x2757, CodepointWidth::Wide }, - UnicodeRange{ 0x2776, 0x277f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2795, 0x2797, CodepointWidth::Wide }, - UnicodeRange{ 0x27b0, 0x27b0, CodepointWidth::Wide }, - UnicodeRange{ 0x27bf, 0x27bf, CodepointWidth::Wide }, - UnicodeRange{ 0x2b1b, 0x2b1c, CodepointWidth::Wide }, - UnicodeRange{ 0x2b50, 0x2b50, CodepointWidth::Wide }, - UnicodeRange{ 0x2b55, 0x2b55, CodepointWidth::Wide }, - UnicodeRange{ 0x2b56, 0x2b59, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x2e80, 0x2e99, CodepointWidth::Wide }, - UnicodeRange{ 0x2e9b, 0x2ef3, CodepointWidth::Wide }, - UnicodeRange{ 0x2f00, 0x2fd5, CodepointWidth::Wide }, - UnicodeRange{ 0x2ff0, 0x2ffb, CodepointWidth::Wide }, - UnicodeRange{ 0x3000, 0x303e, CodepointWidth::Wide }, - UnicodeRange{ 0x3041, 0x3096, CodepointWidth::Wide }, - UnicodeRange{ 0x3099, 0x30ff, CodepointWidth::Wide }, - UnicodeRange{ 0x3105, 0x312f, CodepointWidth::Wide }, - UnicodeRange{ 0x3131, 0x318e, CodepointWidth::Wide }, - UnicodeRange{ 0x3190, 0x31e3, CodepointWidth::Wide }, - UnicodeRange{ 0x31f0, 0x321e, CodepointWidth::Wide }, - UnicodeRange{ 0x3220, 0x3247, CodepointWidth::Wide }, - UnicodeRange{ 0x3248, 0x324f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x3250, 0x4dbf, CodepointWidth::Wide }, - UnicodeRange{ 0x4dc0, 0x4dff, CodepointWidth::Narrow }, // hexagrams are historically narrow - UnicodeRange{ 0x4e00, 0xa48c, CodepointWidth::Wide }, - UnicodeRange{ 0xa490, 0xa4c6, CodepointWidth::Wide }, - UnicodeRange{ 0xa960, 0xa97c, CodepointWidth::Wide }, - UnicodeRange{ 0xac00, 0xd7a3, CodepointWidth::Wide }, - UnicodeRange{ 0xe000, 0xf8ff, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xf900, 0xfaff, CodepointWidth::Wide }, - UnicodeRange{ 0xfe00, 0xfe0f, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xfe10, 0xfe19, CodepointWidth::Wide }, - UnicodeRange{ 0xfe20, 0xfe2f, CodepointWidth::Narrow }, // narrow combining ligatures (split into left/right halves, which take 2 columns together) - UnicodeRange{ 0xfe30, 0xfe52, CodepointWidth::Wide }, - UnicodeRange{ 0xfe54, 0xfe66, CodepointWidth::Wide }, - UnicodeRange{ 0xfe68, 0xfe6b, CodepointWidth::Wide }, - UnicodeRange{ 0xff01, 0xff60, CodepointWidth::Wide }, - UnicodeRange{ 0xffe0, 0xffe6, CodepointWidth::Wide }, - UnicodeRange{ 0xfffd, 0xfffd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x16fe0, 0x16fe4, CodepointWidth::Wide }, - UnicodeRange{ 0x16ff0, 0x16ff1, CodepointWidth::Wide }, - UnicodeRange{ 0x17000, 0x187f7, CodepointWidth::Wide }, - UnicodeRange{ 0x18800, 0x18cd5, CodepointWidth::Wide }, - UnicodeRange{ 0x18d00, 0x18d08, CodepointWidth::Wide }, - UnicodeRange{ 0x1aff0, 0x1aff3, CodepointWidth::Wide }, - UnicodeRange{ 0x1aff5, 0x1affb, CodepointWidth::Wide }, - UnicodeRange{ 0x1affd, 0x1affe, CodepointWidth::Wide }, - UnicodeRange{ 0x1b000, 0x1b122, CodepointWidth::Wide }, - UnicodeRange{ 0x1b132, 0x1b132, CodepointWidth::Wide }, - UnicodeRange{ 0x1b150, 0x1b152, CodepointWidth::Wide }, - UnicodeRange{ 0x1b155, 0x1b155, CodepointWidth::Wide }, - UnicodeRange{ 0x1b164, 0x1b167, CodepointWidth::Wide }, - UnicodeRange{ 0x1b170, 0x1b2fb, CodepointWidth::Wide }, - UnicodeRange{ 0x1f004, 0x1f004, CodepointWidth::Wide }, - UnicodeRange{ 0x1f0cf, 0x1f0cf, CodepointWidth::Wide }, - UnicodeRange{ 0x1f100, 0x1f10a, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f110, 0x1f12d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f130, 0x1f169, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f170, 0x1f18d, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f18e, 0x1f18e, CodepointWidth::Wide }, - UnicodeRange{ 0x1f18f, 0x1f190, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f191, 0x1f19a, CodepointWidth::Wide }, - UnicodeRange{ 0x1f19b, 0x1f1ac, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x1f1e6, 0x1f202, CodepointWidth::Wide }, - UnicodeRange{ 0x1f210, 0x1f23b, CodepointWidth::Wide }, - UnicodeRange{ 0x1f240, 0x1f248, CodepointWidth::Wide }, - UnicodeRange{ 0x1f250, 0x1f251, CodepointWidth::Wide }, - UnicodeRange{ 0x1f260, 0x1f265, CodepointWidth::Wide }, - UnicodeRange{ 0x1f300, 0x1f320, CodepointWidth::Wide }, - UnicodeRange{ 0x1f32d, 0x1f335, CodepointWidth::Wide }, - UnicodeRange{ 0x1f337, 0x1f37c, CodepointWidth::Wide }, - UnicodeRange{ 0x1f37e, 0x1f393, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3a0, 0x1f3ca, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3cf, 0x1f3d3, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3e0, 0x1f3f0, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3f4, 0x1f3f4, CodepointWidth::Wide }, - UnicodeRange{ 0x1f3f8, 0x1f43e, CodepointWidth::Wide }, - UnicodeRange{ 0x1f440, 0x1f440, CodepointWidth::Wide }, - UnicodeRange{ 0x1f442, 0x1f4fc, CodepointWidth::Wide }, - UnicodeRange{ 0x1f4ff, 0x1f53d, CodepointWidth::Wide }, - UnicodeRange{ 0x1f54b, 0x1f54e, CodepointWidth::Wide }, - UnicodeRange{ 0x1f550, 0x1f567, CodepointWidth::Wide }, - UnicodeRange{ 0x1f57a, 0x1f57a, CodepointWidth::Wide }, - UnicodeRange{ 0x1f595, 0x1f596, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5a4, 0x1f5a4, CodepointWidth::Wide }, - UnicodeRange{ 0x1f5fb, 0x1f64f, CodepointWidth::Wide }, - UnicodeRange{ 0x1f680, 0x1f6c5, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6cc, 0x1f6cc, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6d0, 0x1f6d2, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6d5, 0x1f6d7, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6dc, 0x1f6df, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6eb, 0x1f6ec, CodepointWidth::Wide }, - UnicodeRange{ 0x1f6f4, 0x1f6fc, CodepointWidth::Wide }, - UnicodeRange{ 0x1f7e0, 0x1f7eb, CodepointWidth::Wide }, - UnicodeRange{ 0x1f7f0, 0x1f7f0, CodepointWidth::Wide }, - UnicodeRange{ 0x1f90c, 0x1f93a, CodepointWidth::Wide }, - UnicodeRange{ 0x1f93c, 0x1f945, CodepointWidth::Wide }, - UnicodeRange{ 0x1f947, 0x1f9ff, CodepointWidth::Wide }, - UnicodeRange{ 0x1fa70, 0x1fa7c, CodepointWidth::Wide }, - UnicodeRange{ 0x1fa80, 0x1fa88, CodepointWidth::Wide }, - UnicodeRange{ 0x1fa90, 0x1fabd, CodepointWidth::Wide }, - UnicodeRange{ 0x1fabf, 0x1fac5, CodepointWidth::Wide }, - UnicodeRange{ 0x1face, 0x1fadb, CodepointWidth::Wide }, - UnicodeRange{ 0x1fae0, 0x1fae8, CodepointWidth::Wide }, - UnicodeRange{ 0x1faf0, 0x1faf8, CodepointWidth::Wide }, - UnicodeRange{ 0x20000, 0x2fffd, CodepointWidth::Wide }, - UnicodeRange{ 0x30000, 0x3fffd, CodepointWidth::Wide }, - UnicodeRange{ 0xe0100, 0xe01ef, CodepointWidth::Ambiguous }, - UnicodeRange{ 0xf0000, 0xffffd, CodepointWidth::Ambiguous }, - UnicodeRange{ 0x100000, 0x10fffd, CodepointWidth::Ambiguous }, + static constexpr std::array s_wideAndAmbiguousTable{ + UnicodeRange{ 0xa1, 0xa1, 1 }, + UnicodeRange{ 0xa4, 0xa4, 1 }, + UnicodeRange{ 0xa7, 0xa8, 1 }, + UnicodeRange{ 0xaa, 0xaa, 1 }, + UnicodeRange{ 0xad, 0xae, 1 }, + UnicodeRange{ 0xb0, 0xb4, 1 }, + UnicodeRange{ 0xb6, 0xba, 1 }, + UnicodeRange{ 0xbc, 0xbf, 1 }, + UnicodeRange{ 0xc6, 0xc6, 1 }, + UnicodeRange{ 0xd0, 0xd0, 1 }, + UnicodeRange{ 0xd7, 0xd8, 1 }, + UnicodeRange{ 0xde, 0xe1, 1 }, + UnicodeRange{ 0xe6, 0xe6, 1 }, + UnicodeRange{ 0xe8, 0xea, 1 }, + UnicodeRange{ 0xec, 0xed, 1 }, + UnicodeRange{ 0xf0, 0xf0, 1 }, + UnicodeRange{ 0xf2, 0xf3, 1 }, + UnicodeRange{ 0xf7, 0xfa, 1 }, + UnicodeRange{ 0xfc, 0xfc, 1 }, + UnicodeRange{ 0xfe, 0xfe, 1 }, + UnicodeRange{ 0x101, 0x101, 1 }, + UnicodeRange{ 0x111, 0x111, 1 }, + UnicodeRange{ 0x113, 0x113, 1 }, + UnicodeRange{ 0x11b, 0x11b, 1 }, + UnicodeRange{ 0x126, 0x127, 1 }, + UnicodeRange{ 0x12b, 0x12b, 1 }, + UnicodeRange{ 0x131, 0x133, 1 }, + UnicodeRange{ 0x138, 0x138, 1 }, + UnicodeRange{ 0x13f, 0x142, 1 }, + UnicodeRange{ 0x144, 0x144, 1 }, + UnicodeRange{ 0x148, 0x14b, 1 }, + UnicodeRange{ 0x14d, 0x14d, 1 }, + UnicodeRange{ 0x152, 0x153, 1 }, + UnicodeRange{ 0x166, 0x167, 1 }, + UnicodeRange{ 0x16b, 0x16b, 1 }, + UnicodeRange{ 0x1ce, 0x1ce, 1 }, + UnicodeRange{ 0x1d0, 0x1d0, 1 }, + UnicodeRange{ 0x1d2, 0x1d2, 1 }, + UnicodeRange{ 0x1d4, 0x1d4, 1 }, + UnicodeRange{ 0x1d6, 0x1d6, 1 }, + UnicodeRange{ 0x1d8, 0x1d8, 1 }, + UnicodeRange{ 0x1da, 0x1da, 1 }, + UnicodeRange{ 0x1dc, 0x1dc, 1 }, + UnicodeRange{ 0x251, 0x251, 1 }, + UnicodeRange{ 0x261, 0x261, 1 }, + UnicodeRange{ 0x2c4, 0x2c4, 1 }, + UnicodeRange{ 0x2c7, 0x2c7, 1 }, + UnicodeRange{ 0x2c9, 0x2cb, 1 }, + UnicodeRange{ 0x2cd, 0x2cd, 1 }, + UnicodeRange{ 0x2d0, 0x2d0, 1 }, + UnicodeRange{ 0x2d8, 0x2db, 1 }, + UnicodeRange{ 0x2dd, 0x2dd, 1 }, + UnicodeRange{ 0x2df, 0x2df, 1 }, + UnicodeRange{ 0x300, 0x36f, 1 }, + UnicodeRange{ 0x391, 0x3a1, 1 }, + UnicodeRange{ 0x3a3, 0x3a9, 1 }, + UnicodeRange{ 0x3b1, 0x3c1, 1 }, + UnicodeRange{ 0x3c3, 0x3c9, 1 }, + UnicodeRange{ 0x401, 0x401, 1 }, + UnicodeRange{ 0x410, 0x44f, 1 }, + UnicodeRange{ 0x451, 0x451, 1 }, + UnicodeRange{ 0x1100, 0x115f, 0 }, + UnicodeRange{ 0x2010, 0x2010, 1 }, + UnicodeRange{ 0x2013, 0x2016, 1 }, + UnicodeRange{ 0x2018, 0x2019, 1 }, + UnicodeRange{ 0x201c, 0x201d, 1 }, + UnicodeRange{ 0x2020, 0x2022, 1 }, + UnicodeRange{ 0x2024, 0x2027, 1 }, + UnicodeRange{ 0x2030, 0x2030, 1 }, + UnicodeRange{ 0x2032, 0x2033, 1 }, + UnicodeRange{ 0x2035, 0x2035, 1 }, + UnicodeRange{ 0x203b, 0x203b, 1 }, + UnicodeRange{ 0x203e, 0x203e, 1 }, + UnicodeRange{ 0x2074, 0x2074, 1 }, + UnicodeRange{ 0x207f, 0x207f, 1 }, + UnicodeRange{ 0x2081, 0x2084, 1 }, + UnicodeRange{ 0x20ac, 0x20ac, 1 }, + UnicodeRange{ 0x2103, 0x2103, 1 }, + UnicodeRange{ 0x2105, 0x2105, 1 }, + UnicodeRange{ 0x2109, 0x2109, 1 }, + UnicodeRange{ 0x2113, 0x2113, 1 }, + UnicodeRange{ 0x2116, 0x2116, 1 }, + UnicodeRange{ 0x2121, 0x2122, 1 }, + UnicodeRange{ 0x2126, 0x2126, 1 }, + UnicodeRange{ 0x212b, 0x212b, 1 }, + UnicodeRange{ 0x2153, 0x2154, 1 }, + UnicodeRange{ 0x215b, 0x215e, 1 }, + UnicodeRange{ 0x2160, 0x216b, 1 }, + UnicodeRange{ 0x2170, 0x2179, 1 }, + UnicodeRange{ 0x2189, 0x2189, 1 }, + UnicodeRange{ 0x2190, 0x2199, 1 }, + UnicodeRange{ 0x21b8, 0x21b9, 1 }, + UnicodeRange{ 0x21d2, 0x21d2, 1 }, + UnicodeRange{ 0x21d4, 0x21d4, 1 }, + UnicodeRange{ 0x21e7, 0x21e7, 1 }, + UnicodeRange{ 0x2200, 0x2200, 1 }, + UnicodeRange{ 0x2202, 0x2203, 1 }, + UnicodeRange{ 0x2207, 0x2208, 1 }, + UnicodeRange{ 0x220b, 0x220b, 1 }, + UnicodeRange{ 0x220f, 0x220f, 1 }, + UnicodeRange{ 0x2211, 0x2211, 1 }, + UnicodeRange{ 0x2215, 0x2215, 1 }, + UnicodeRange{ 0x221a, 0x221a, 1 }, + UnicodeRange{ 0x221d, 0x2220, 1 }, + UnicodeRange{ 0x2223, 0x2223, 1 }, + UnicodeRange{ 0x2225, 0x2225, 1 }, + UnicodeRange{ 0x2227, 0x222c, 1 }, + UnicodeRange{ 0x222e, 0x222e, 1 }, + UnicodeRange{ 0x2234, 0x2237, 1 }, + UnicodeRange{ 0x223c, 0x223d, 1 }, + UnicodeRange{ 0x2248, 0x2248, 1 }, + UnicodeRange{ 0x224c, 0x224c, 1 }, + UnicodeRange{ 0x2252, 0x2252, 1 }, + UnicodeRange{ 0x2260, 0x2261, 1 }, + UnicodeRange{ 0x2264, 0x2267, 1 }, + UnicodeRange{ 0x226a, 0x226b, 1 }, + UnicodeRange{ 0x226e, 0x226f, 1 }, + UnicodeRange{ 0x2282, 0x2283, 1 }, + UnicodeRange{ 0x2286, 0x2287, 1 }, + UnicodeRange{ 0x2295, 0x2295, 1 }, + UnicodeRange{ 0x2299, 0x2299, 1 }, + UnicodeRange{ 0x22a5, 0x22a5, 1 }, + UnicodeRange{ 0x22bf, 0x22bf, 1 }, + UnicodeRange{ 0x2312, 0x2312, 1 }, + UnicodeRange{ 0x231a, 0x231b, 0 }, + UnicodeRange{ 0x2329, 0x232a, 0 }, + UnicodeRange{ 0x23e9, 0x23ec, 0 }, + UnicodeRange{ 0x23f0, 0x23f0, 0 }, + UnicodeRange{ 0x23f3, 0x23f3, 0 }, + UnicodeRange{ 0x2460, 0x24e9, 1 }, + UnicodeRange{ 0x24eb, 0x24ff, 1 }, + UnicodeRange{ 0x25a0, 0x25a1, 1 }, + UnicodeRange{ 0x25a3, 0x25a9, 1 }, + UnicodeRange{ 0x25b2, 0x25b3, 1 }, + UnicodeRange{ 0x25b6, 0x25b7, 1 }, + UnicodeRange{ 0x25bc, 0x25bd, 1 }, + UnicodeRange{ 0x25c0, 0x25c1, 1 }, + UnicodeRange{ 0x25c6, 0x25c8, 1 }, + UnicodeRange{ 0x25cb, 0x25cb, 1 }, + UnicodeRange{ 0x25ce, 0x25d1, 1 }, + UnicodeRange{ 0x25e2, 0x25e5, 1 }, + UnicodeRange{ 0x25ef, 0x25ef, 1 }, + UnicodeRange{ 0x25fd, 0x25fe, 0 }, + UnicodeRange{ 0x2605, 0x2606, 1 }, + UnicodeRange{ 0x2609, 0x2609, 1 }, + UnicodeRange{ 0x260e, 0x260f, 1 }, + UnicodeRange{ 0x2614, 0x2615, 0 }, + UnicodeRange{ 0x261c, 0x261c, 1 }, + UnicodeRange{ 0x261e, 0x261e, 1 }, + UnicodeRange{ 0x2640, 0x2640, 1 }, + UnicodeRange{ 0x2642, 0x2642, 1 }, + UnicodeRange{ 0x2648, 0x2653, 0 }, + UnicodeRange{ 0x2660, 0x2661, 1 }, + UnicodeRange{ 0x2663, 0x2665, 1 }, + UnicodeRange{ 0x2667, 0x266a, 1 }, + UnicodeRange{ 0x266c, 0x266d, 1 }, + UnicodeRange{ 0x266f, 0x266f, 1 }, + UnicodeRange{ 0x267f, 0x267f, 0 }, + UnicodeRange{ 0x2693, 0x2693, 0 }, + UnicodeRange{ 0x269e, 0x269f, 1 }, + UnicodeRange{ 0x26a1, 0x26a1, 0 }, + UnicodeRange{ 0x26aa, 0x26ab, 0 }, + UnicodeRange{ 0x26bd, 0x26be, 0 }, + UnicodeRange{ 0x26bf, 0x26bf, 1 }, + UnicodeRange{ 0x26c4, 0x26c5, 0 }, + UnicodeRange{ 0x26c6, 0x26cd, 1 }, + UnicodeRange{ 0x26ce, 0x26ce, 0 }, + UnicodeRange{ 0x26cf, 0x26d3, 1 }, + UnicodeRange{ 0x26d4, 0x26d4, 0 }, + UnicodeRange{ 0x26d5, 0x26e1, 1 }, + UnicodeRange{ 0x26e3, 0x26e3, 1 }, + UnicodeRange{ 0x26e8, 0x26e9, 1 }, + UnicodeRange{ 0x26ea, 0x26ea, 0 }, + UnicodeRange{ 0x26eb, 0x26f1, 1 }, + UnicodeRange{ 0x26f2, 0x26f3, 0 }, + UnicodeRange{ 0x26f4, 0x26f4, 1 }, + UnicodeRange{ 0x26f5, 0x26f5, 0 }, + UnicodeRange{ 0x26f6, 0x26f9, 1 }, + UnicodeRange{ 0x26fa, 0x26fa, 0 }, + UnicodeRange{ 0x26fb, 0x26fc, 1 }, + UnicodeRange{ 0x26fd, 0x26fd, 0 }, + UnicodeRange{ 0x26fe, 0x26ff, 1 }, + UnicodeRange{ 0x2705, 0x2705, 0 }, + UnicodeRange{ 0x270a, 0x270b, 0 }, + UnicodeRange{ 0x2728, 0x2728, 0 }, + UnicodeRange{ 0x273d, 0x273d, 1 }, + UnicodeRange{ 0x274c, 0x274c, 0 }, + UnicodeRange{ 0x274e, 0x274e, 0 }, + UnicodeRange{ 0x2753, 0x2755, 0 }, + UnicodeRange{ 0x2757, 0x2757, 0 }, + UnicodeRange{ 0x2776, 0x277f, 1 }, + UnicodeRange{ 0x2795, 0x2797, 0 }, + UnicodeRange{ 0x27b0, 0x27b0, 0 }, + UnicodeRange{ 0x27bf, 0x27bf, 0 }, + UnicodeRange{ 0x2b1b, 0x2b1c, 0 }, + UnicodeRange{ 0x2b50, 0x2b50, 0 }, + UnicodeRange{ 0x2b55, 0x2b55, 0 }, + UnicodeRange{ 0x2b56, 0x2b59, 1 }, + UnicodeRange{ 0x2e80, 0x2e99, 0 }, + UnicodeRange{ 0x2e9b, 0x2ef3, 0 }, + UnicodeRange{ 0x2f00, 0x2fd5, 0 }, + UnicodeRange{ 0x2ff0, 0x2ffb, 0 }, + UnicodeRange{ 0x3000, 0x303e, 0 }, + UnicodeRange{ 0x3041, 0x3096, 0 }, + UnicodeRange{ 0x3099, 0x30ff, 0 }, + UnicodeRange{ 0x3105, 0x312f, 0 }, + UnicodeRange{ 0x3131, 0x318e, 0 }, + UnicodeRange{ 0x3190, 0x31e3, 0 }, + UnicodeRange{ 0x31f0, 0x321e, 0 }, + UnicodeRange{ 0x3220, 0x3247, 0 }, + UnicodeRange{ 0x3248, 0x324f, 1 }, + UnicodeRange{ 0x3250, 0x4dbf, 0 }, + UnicodeRange{ 0x4e00, 0xa48c, 0 }, + UnicodeRange{ 0xa490, 0xa4c6, 0 }, + UnicodeRange{ 0xa960, 0xa97c, 0 }, + UnicodeRange{ 0xac00, 0xd7a3, 0 }, + UnicodeRange{ 0xe000, 0xf8ff, 1 }, + UnicodeRange{ 0xf900, 0xfaff, 0 }, + UnicodeRange{ 0xfe00, 0xfe0f, 1 }, + UnicodeRange{ 0xfe10, 0xfe19, 0 }, + UnicodeRange{ 0xfe30, 0xfe52, 0 }, + UnicodeRange{ 0xfe54, 0xfe66, 0 }, + UnicodeRange{ 0xfe68, 0xfe6b, 0 }, + UnicodeRange{ 0xff01, 0xff60, 0 }, + UnicodeRange{ 0xffe0, 0xffe6, 0 }, + UnicodeRange{ 0xfffd, 0xfffd, 1 }, + UnicodeRange{ 0x16fe0, 0x16fe4, 0 }, + UnicodeRange{ 0x16ff0, 0x16ff1, 0 }, + UnicodeRange{ 0x17000, 0x187f7, 0 }, + UnicodeRange{ 0x18800, 0x18cd5, 0 }, + UnicodeRange{ 0x18d00, 0x18d08, 0 }, + UnicodeRange{ 0x1aff0, 0x1aff3, 0 }, + UnicodeRange{ 0x1aff5, 0x1affb, 0 }, + UnicodeRange{ 0x1affd, 0x1affe, 0 }, + UnicodeRange{ 0x1b000, 0x1b122, 0 }, + UnicodeRange{ 0x1b132, 0x1b132, 0 }, + UnicodeRange{ 0x1b150, 0x1b152, 0 }, + UnicodeRange{ 0x1b155, 0x1b155, 0 }, + UnicodeRange{ 0x1b164, 0x1b167, 0 }, + UnicodeRange{ 0x1b170, 0x1b2fb, 0 }, + UnicodeRange{ 0x1f004, 0x1f004, 0 }, + UnicodeRange{ 0x1f0cf, 0x1f0cf, 0 }, + UnicodeRange{ 0x1f100, 0x1f10a, 1 }, + UnicodeRange{ 0x1f110, 0x1f12d, 1 }, + UnicodeRange{ 0x1f130, 0x1f169, 1 }, + UnicodeRange{ 0x1f170, 0x1f18d, 1 }, + UnicodeRange{ 0x1f18e, 0x1f18e, 0 }, + UnicodeRange{ 0x1f18f, 0x1f190, 1 }, + UnicodeRange{ 0x1f191, 0x1f19a, 0 }, + UnicodeRange{ 0x1f19b, 0x1f1ac, 1 }, + UnicodeRange{ 0x1f1e6, 0x1f202, 0 }, + UnicodeRange{ 0x1f210, 0x1f23b, 0 }, + UnicodeRange{ 0x1f240, 0x1f248, 0 }, + UnicodeRange{ 0x1f250, 0x1f251, 0 }, + UnicodeRange{ 0x1f260, 0x1f265, 0 }, + UnicodeRange{ 0x1f300, 0x1f320, 0 }, + UnicodeRange{ 0x1f32d, 0x1f335, 0 }, + UnicodeRange{ 0x1f337, 0x1f37c, 0 }, + UnicodeRange{ 0x1f37e, 0x1f393, 0 }, + UnicodeRange{ 0x1f3a0, 0x1f3ca, 0 }, + UnicodeRange{ 0x1f3cf, 0x1f3d3, 0 }, + UnicodeRange{ 0x1f3e0, 0x1f3f0, 0 }, + UnicodeRange{ 0x1f3f4, 0x1f3f4, 0 }, + UnicodeRange{ 0x1f3f8, 0x1f43e, 0 }, + UnicodeRange{ 0x1f440, 0x1f440, 0 }, + UnicodeRange{ 0x1f442, 0x1f4fc, 0 }, + UnicodeRange{ 0x1f4ff, 0x1f53d, 0 }, + UnicodeRange{ 0x1f54b, 0x1f54e, 0 }, + UnicodeRange{ 0x1f550, 0x1f567, 0 }, + UnicodeRange{ 0x1f57a, 0x1f57a, 0 }, + UnicodeRange{ 0x1f595, 0x1f596, 0 }, + UnicodeRange{ 0x1f5a4, 0x1f5a4, 0 }, + UnicodeRange{ 0x1f5fb, 0x1f64f, 0 }, + UnicodeRange{ 0x1f680, 0x1f6c5, 0 }, + UnicodeRange{ 0x1f6cc, 0x1f6cc, 0 }, + UnicodeRange{ 0x1f6d0, 0x1f6d2, 0 }, + UnicodeRange{ 0x1f6d5, 0x1f6d7, 0 }, + UnicodeRange{ 0x1f6dc, 0x1f6df, 0 }, + UnicodeRange{ 0x1f6eb, 0x1f6ec, 0 }, + UnicodeRange{ 0x1f6f4, 0x1f6fc, 0 }, + UnicodeRange{ 0x1f7e0, 0x1f7eb, 0 }, + UnicodeRange{ 0x1f7f0, 0x1f7f0, 0 }, + UnicodeRange{ 0x1f90c, 0x1f93a, 0 }, + UnicodeRange{ 0x1f93c, 0x1f945, 0 }, + UnicodeRange{ 0x1f947, 0x1f9ff, 0 }, + UnicodeRange{ 0x1fa70, 0x1fa7c, 0 }, + UnicodeRange{ 0x1fa80, 0x1fa88, 0 }, + UnicodeRange{ 0x1fa90, 0x1fabd, 0 }, + UnicodeRange{ 0x1fabf, 0x1fac5, 0 }, + UnicodeRange{ 0x1face, 0x1fadb, 0 }, + UnicodeRange{ 0x1fae0, 0x1fae8, 0 }, + UnicodeRange{ 0x1faf0, 0x1faf8, 0 }, + UnicodeRange{ 0x20000, 0x2fffd, 0 }, + UnicodeRange{ 0x30000, 0x3fffd, 0 }, + UnicodeRange{ 0xe0100, 0xe01ef, 1 }, + UnicodeRange{ 0xf0000, 0xffffd, 1 }, + UnicodeRange{ 0x100000, 0x10fffd, 1 }, }; } -// Routine Description: -// - Constructs an instance of the CodepointWidthDetector class -CodepointWidthDetector::CodepointWidthDetector() noexcept : - _fallbackCache{}, - _pfnFallbackMethod{} -{ -} - // Routine Description: // - returns the width type of codepoint as fast as we can by using quick lookup table and fallback cache. // Arguments: // - glyph - the utf16 encoded codepoint to search for // Return Value: // - the width type of the codepoint -CodepointWidth CodepointWidthDetector::GetWidth(const std::wstring_view glyph) const +CodepointWidth CodepointWidthDetector::GetWidth(const std::wstring_view& glyph) noexcept { - THROW_HR_IF(E_INVALIDARG, glyph.empty()); - if (glyph.size() == 1) - { - // We first attempt to look at our custom quick lookup table of char width preferences. - const auto width = GetQuickCharWidth(glyph.front()); + char32_t codepoint = 0; - // If it's invalid, the quick width had no opinion, so go to the lookup table. - if (width == CodepointWidth::Invalid) - { - return _lookupGlyphWidthWithCache(glyph); - } - // If it's ambiguous, the quick width wanted us to ask the font directly, try that if we can. - // If not, go to the lookup table. - else if (width == CodepointWidth::Ambiguous) - { - if (_pfnFallbackMethod) - { - return _checkFallbackViaCache(glyph) ? CodepointWidth::Wide : CodepointWidth::Ambiguous; - } - else - { - return _lookupGlyphWidthWithCache(glyph); - } - } - // Otherwise, return Width as it is. - else - { - return width; - } - } - else + switch (glyph.size()) { - return _lookupGlyphWidthWithCache(glyph); + case 1: + codepoint = til::at(glyph, 0); + break; + case 2: + codepoint = (til::at(glyph, 0) & 0x3FF) << 10; + codepoint |= til::at(glyph, 1) & 0x3FF; + codepoint += 0x10000; + break; + default: + codepoint = 0; + break; } -} -// Routine Description: -// - checks if wch is wide. will attempt to fallback as much possible until an answer is determined -// Arguments: -// - wch - the wchar to check width of -// Return Value: -// - true if wch is wide -bool CodepointWidthDetector::IsWide(const wchar_t wch) const noexcept -{ - try + if (codepoint < 0x80) { - return IsWide({ &wch, 1 }); + return CodepointWidth::Narrow; } - CATCH_LOG(); - return true; + // The return value of _lookupGlyphWidth coincides with the enum value of CodepointWidth + // on purpose to allow for this easy conversion to happen. Optimally, we should probably + // remove CodepointWidth altogether to allow for zero-width joiners and other characters. + static_assert(WI_EnumValue(CodepointWidth::Narrow) == 1); + static_assert(WI_EnumValue(CodepointWidth::Wide) == 2); + return static_cast(_lookupGlyphWidth(codepoint, glyph)); } // Routine Description: @@ -403,123 +369,54 @@ bool CodepointWidthDetector::IsWide(const wchar_t wch) const noexcept // - glyph - the utf16 encoded codepoint to check width of // Return Value: // - true if codepoint is wide -bool CodepointWidthDetector::IsWide(const std::wstring_view glyph) const +bool CodepointWidthDetector::IsWide(const std::wstring_view& glyph) noexcept { return GetWidth(glyph) == CodepointWidth::Wide; } -// Routine Description: -// - returns the width type of codepoint by searching the map generated from the unicode spec -// Arguments: -// - glyph - the utf16 encoded codepoint to search for -// Return Value: -// - the width type of the codepoint -CodepointWidth CodepointWidthDetector::_lookupGlyphWidth(const std::wstring_view glyph) const +// GetWidth's slow-path for non-ASCII characters. Returns the number of columns the codepoint takes up in the terminal. +uint8_t CodepointWidthDetector::_lookupGlyphWidth(const char32_t codepoint, const std::wstring_view& glyph) noexcept { - if (glyph.empty()) - { - return CodepointWidth::Invalid; - } - - const auto codepoint = _extractCodepoint(glyph); +#pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function 'lower_bound<...>()' which may throw exceptions (f.6). const auto it = std::lower_bound(s_wideAndAmbiguousTable.begin(), s_wideAndAmbiguousTable.end(), codepoint); + uint8_t width = 1; - // For characters that are not _in_ the table, lower_bound will return the nearest item that is. - // We must check its bounds to make sure that our hit was a true hit. if (it != s_wideAndAmbiguousTable.end() && codepoint >= it->lowerBound && codepoint <= it->upperBound) { - return it->width; - } - - return CodepointWidth::Narrow; -} - -// Routine Description: -// - returns the width type of codepoint using fallback methods. -// Arguments: -// - glyph - the utf16 encoded codepoint to check width of -// Return Value: -// - the width type of the codepoint -CodepointWidth CodepointWidthDetector::_lookupGlyphWidthWithCache(const std::wstring_view glyph) const noexcept -{ - try - { - // Use our generated table to try to lookup the width based on the Unicode standard. - const auto width = _lookupGlyphWidth(glyph); - - // If it's ambiguous, then ask the font if we can. - if (width == CodepointWidth::Ambiguous) + width = 2; + if (it->isAmbiguous) { - if (_pfnFallbackMethod) - { - return _checkFallbackViaCache(glyph) ? CodepointWidth::Wide : CodepointWidth::Ambiguous; - } - else - { - return CodepointWidth::Ambiguous; - } - } - // If it's not ambiguous, it should say wide or narrow. - else - { - return width; + width = _checkFallbackViaCache(codepoint, glyph); } } - CATCH_LOG(); - // If we got this far, we couldn't figure it out. - // It's better to be too wide than too narrow. - return CodepointWidth::Wide; + return width; } -// Routine Description: -// - Checks the fallback function but caches the results until the font changes -// because the lookup function is usually very expensive and will return the same results -// for the same inputs. -// Arguments: -// - glyph - the utf16 encoded codepoint to check width of -// - true if codepoint is wide or false if it is narrow -bool CodepointWidthDetector::_checkFallbackViaCache(const std::wstring_view glyph) const +// Call the function specified via SetFallbackMethod() to turn CodepointWidth::Ambiguous into Narrow/Wide. +// Caches the results in _fallbackCache. This is _lookupGlyphWidth's even-slower-path. +uint8_t CodepointWidthDetector::_checkFallbackViaCache(const char32_t codepoint, const std::wstring_view& glyph) noexcept +try { - const std::wstring findMe{ glyph }; - - // TODO: Cache needs to be emptied when font changes. - const auto it = _fallbackCache.find(findMe); - if (it == _fallbackCache.end()) + // Ambiguous glyphs are considered narrow by default. See microsoft/terminal#2066 for more info. + if (!_pfnFallbackMethod) { - auto result = _pfnFallbackMethod(glyph); - _fallbackCache.insert_or_assign(findMe, result); - return result; + return 1; } - else + + if (const auto it = _fallbackCache.find(codepoint); it != _fallbackCache.end()) { return it->second; } -} -// Routine Description: -// - extract unicode codepoint from utf16 encoding -// Arguments: -// - glyph - the utf16 encoded codepoint convert -// Return Value: -// - the codepoint being stored -unsigned int CodepointWidthDetector::_extractCodepoint(const std::wstring_view glyph) noexcept + const uint8_t width = _pfnFallbackMethod(glyph) ? 2 : 1; + _fallbackCache.insert_or_assign(codepoint, width); + return width; +} +catch (...) { - if (glyph.size() == 1) - { - return static_cast(glyph.front()); - } - else - { - const unsigned int mask = 0x3FF; - // leading bits, shifted over to make space for trailing bits - auto codepoint = (glyph.at(0) & mask) << 10; - // trailing bits - codepoint |= (glyph.at(1) & mask); - // 0x10000 is subtracted from the codepoint to encode a surrogate pair, add it back - codepoint += 0x10000; - return codepoint; - } + LOG_CAUGHT_EXCEPTION(); + return 1; } // Method Description: @@ -532,9 +429,9 @@ unsigned int CodepointWidthDetector::_extractCodepoint(const std::wstring_view g // - pfnFallback - the function to use as the fallback method. // Return Value: // - -void CodepointWidthDetector::SetFallbackMethod(std::function pfnFallback) +void CodepointWidthDetector::SetFallbackMethod(std::function pfnFallback) noexcept { - _pfnFallbackMethod = pfnFallback; + _pfnFallbackMethod = std::move(pfnFallback); } // Method Description: @@ -545,7 +442,8 @@ void CodepointWidthDetector::SetFallbackMethod(std::function // Return Value: // - -void CodepointWidthDetector::NotifyFontChanged() const noexcept +void CodepointWidthDetector::NotifyFontChanged() noexcept { +#pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function 'clear()' which may throw exceptions (f.6). _fallbackCache.clear(); } diff --git a/src/types/GlyphWidth.cpp b/src/types/GlyphWidth.cpp index f90c79f948..2f7d0578a4 100644 --- a/src/types/GlyphWidth.cpp +++ b/src/types/GlyphWidth.cpp @@ -12,7 +12,7 @@ static CodepointWidthDetector widthDetector; // Function Description: // - determines if the glyph represented by the string of characters should be // wide or not. See CodepointWidthDetector::IsWide -bool IsGlyphFullWidth(const std::wstring_view glyph) +bool IsGlyphFullWidth(const std::wstring_view& glyph) noexcept { return widthDetector.IsWide(glyph); } @@ -22,7 +22,7 @@ bool IsGlyphFullWidth(const std::wstring_view glyph) // wide or not. See CodepointWidthDetector::IsWide bool IsGlyphFullWidth(const wchar_t wch) noexcept { - return widthDetector.IsWide(wch); + return wch < 0x80 ? false : IsGlyphFullWidth({ &wch, 1 }); } // Function Description: @@ -35,9 +35,9 @@ bool IsGlyphFullWidth(const wchar_t wch) noexcept // - pfnFallback - the function to use as the fallback method. // Return Value: // - -void SetGlyphWidthFallback(std::function pfnFallback) +void SetGlyphWidthFallback(std::function pfnFallback) noexcept { - widthDetector.SetFallbackMethod(pfnFallback); + widthDetector.SetFallbackMethod(std::move(pfnFallback)); } // Function Description: diff --git a/src/types/convert.cpp b/src/types/convert.cpp index 4427e93dae..abc03d5d3d 100644 --- a/src/types/convert.cpp +++ b/src/types/convert.cpp @@ -130,40 +130,6 @@ return cchTarget; } -// Routine Description: -// - naively determines the width of a UCS2 encoded wchar -// Arguments: -// - wch - the wchar_t to measure -// Return Value: -// - CodepointWidth indicating width of wch -// Notes: -// 04-08-92 ShunK Created. -// Jul-27-1992 KazuM Added Screen Information and Code Page Information. -// Jan-29-1992 V-Hirots Substruct Screen Information. -// Oct-06-1996 KazuM Not use RtlUnicodeToMultiByteSize and WideCharToMultiByte -// Because 950 (Chinese Traditional) only defined 13500 chars, -// and unicode defined almost 18000 chars. -// So there are almost 4000 chars can not be mapped to big5 code. -// Apr-30-2015 MiNiksa Corrected unknown character code assumption. Max Width in Text Metric -// is not reliable for calculating half/full width. Must use current -// display font data (cached) instead. -// May-23-2017 migrie Forced Box-Drawing Characters (x2500-x257F) to narrow. -// Jan-16-2018 migrie Separated core lookup from asking the renderer the width -// May-01-2019 MiNiksa Forced lookup-via-renderer for retroactively recategorized emoji -// that used to be narrow but now might be wide. (approx x2194-x2b55, not inclusive) -// Also forced block characters segment (x2580-x259F) to narrow -// Oct-25-2020 DuHowett Replaced the entire table with a set of overrides that get built into -// CodepointWidthDetector (unicode_width_overrides.xml) -CodepointWidth GetQuickCharWidth(const wchar_t wch) noexcept -{ - if (0x20 <= wch && wch <= 0x7e) - { - /* ASCII */ - return CodepointWidth::Narrow; - } - return CodepointWidth::Invalid; -} - wchar_t Utf16ToUcs2(const std::wstring_view charData) { THROW_HR_IF(E_INVALIDARG, charData.empty()); diff --git a/src/types/inc/CodepointWidthDetector.hpp b/src/types/inc/CodepointWidthDetector.hpp index 810b4bd7bd..79d0f3e858 100644 --- a/src/types/inc/CodepointWidthDetector.hpp +++ b/src/types/inc/CodepointWidthDetector.hpp @@ -14,38 +14,24 @@ Author: #pragma once #include "convert.hpp" -#include - -static_assert(sizeof(unsigned int) == sizeof(wchar_t) * 2, - "UnicodeRange expects to be able to store a unicode codepoint in an unsigned int"); // use to measure the width of a codepoint class CodepointWidthDetector final { public: - CodepointWidthDetector() noexcept; - CodepointWidthDetector(const CodepointWidthDetector&) = delete; - CodepointWidthDetector(CodepointWidthDetector&&) = delete; - ~CodepointWidthDetector() = default; - CodepointWidthDetector& operator=(const CodepointWidthDetector&) = delete; - CodepointWidthDetector& operator=(CodepointWidthDetector&&) = delete; - - CodepointWidth GetWidth(const std::wstring_view glyph) const; - bool IsWide(const std::wstring_view glyph) const; - bool IsWide(const wchar_t wch) const noexcept; - void SetFallbackMethod(std::function pfnFallback); - void NotifyFontChanged() const noexcept; + CodepointWidth GetWidth(const std::wstring_view& glyph) noexcept; + bool IsWide(const std::wstring_view& glyph) noexcept; + void SetFallbackMethod(std::function pfnFallback) noexcept; + void NotifyFontChanged() noexcept; #ifdef UNIT_TESTING friend class CodepointWidthDetectorTests; #endif private: - CodepointWidth _lookupGlyphWidth(const std::wstring_view glyph) const; - CodepointWidth _lookupGlyphWidthWithCache(const std::wstring_view glyph) const noexcept; - bool _checkFallbackViaCache(const std::wstring_view glyph) const; - static unsigned int _extractCodepoint(const std::wstring_view glyph) noexcept; + uint8_t _lookupGlyphWidth(char32_t codepoint, const std::wstring_view& glyph) noexcept; + uint8_t _checkFallbackViaCache(char32_t codepoint, const std::wstring_view& glyph) noexcept; - mutable std::unordered_map _fallbackCache; - std::function _pfnFallbackMethod; + std::unordered_map _fallbackCache; + std::function _pfnFallbackMethod; }; diff --git a/src/types/inc/GlyphWidth.hpp b/src/types/inc/GlyphWidth.hpp index 7888915beb..11982bf5a0 100644 --- a/src/types/inc/GlyphWidth.hpp +++ b/src/types/inc/GlyphWidth.hpp @@ -8,11 +8,14 @@ Abstract: - Helpers for determining the width of a particular string of chars. */ +#pragma once #include #include -bool IsGlyphFullWidth(const std::wstring_view glyph); +#include "convert.hpp" + +bool IsGlyphFullWidth(const std::wstring_view& glyph) noexcept; bool IsGlyphFullWidth(const wchar_t wch) noexcept; -void SetGlyphWidthFallback(std::function pfnFallback); +void SetGlyphWidthFallback(std::function pfnFallback) noexcept; void NotifyGlyphWidthFontChanged() noexcept; diff --git a/src/types/inc/convert.hpp b/src/types/inc/convert.hpp index 6223a07dec..d4cd299c91 100644 --- a/src/types/inc/convert.hpp +++ b/src/types/inc/convert.hpp @@ -19,10 +19,9 @@ Author: enum class CodepointWidth : BYTE { + Ambiguous = 0, // could be narrow or wide depending on the current codepage and font Narrow, Wide, - Ambiguous, // could be narrow or wide depending on the current codepage and font - Invalid // not a valid unicode codepoint }; [[nodiscard]] std::wstring ConvertToW(const UINT codepage, @@ -34,6 +33,4 @@ enum class CodepointWidth : BYTE [[nodiscard]] size_t GetALengthFromW(const UINT codepage, const std::wstring_view source); -CodepointWidth GetQuickCharWidth(const wchar_t wch) noexcept; - wchar_t Utf16ToUcs2(const std::wstring_view charData); diff --git a/tools/Generate-CodepointWidthsFromUCD.ps1 b/tools/Generate-CodepointWidthsFromUCD.ps1 index d1eb56ea65..beee858abc 100644 --- a/tools/Generate-CodepointWidthsFromUCD.ps1 +++ b/tools/Generate-CodepointWidthsFromUCD.ps1 @@ -20,8 +20,8 @@ # significantly smaller, which would provide a performance win on the admittedly # extremely rare occasion that we should need to regenerate our table. # -# Invoke as ./Generate-xxx ucd.nounihan.flat.xml -Pack | Out-File -Encoding -# UTF-8 Temporary.cpp +# Invoke this script from the root of this repository as: +# .\tools\Generate-CodepointWidthsFromUCD.ps1 -Path .\path\to\ucd.nounihan.flat.xml -OverridePath .\src\types\unicode_width_overrides.xml -Pack # # [1]: https://www.unicode.org/Public/UCD/latest/ucdxml/ # [2]: https://www.unicode.org/reports/tr42/ @@ -43,15 +43,13 @@ Param( [string]$OverridePath = "overrides.xml", [switch]$Pack, # Pack tightly based on width - [switch]$NoOverrides, # Do not include overrides - [switch]$Full = $False # Include Narrow codepoints + [switch]$NoOverrides # Do not include overrides ) Enum CodepointWidth { Narrow; Wide; Ambiguous; - Invalid; } # UCD Functions {{{ @@ -82,8 +80,8 @@ Function Get-UCDEntryWidth($entry) { "W" { [CodepointWidth]::Wide; Return } "F" { [CodepointWidth]::Wide; Return } "A" { [CodepointWidth]::Ambiguous; Return } + default { throw "Unexpected East_Asian_Width property" } } - [CodepointWidth]::Invalid } Function Get-UCDEntryFlags($entry) { @@ -224,20 +222,10 @@ $UCDRepertoire = $InputObject.ucd.repertoire.ChildNodes | Sort-Object { } } -If (-not $Full) { - $UCDRepertoire = $UCDRepertoire | Where-Object { - # Select everything Wide/Ambiguous/Full OR Emoji w/ Emoji Presentation - ($_.ea -notin "N", "Na", "H") -or ($_.Emoji -eq "Y" -and $_.EPres -eq "Y") - } -} - $ranges = [UnicodeRangeList]::New(1024) -$c = 0 ForEach($v in $UCDRepertoire) { $range = [UnicodeRange]::new($v) - $c += $range.Length() - If ($ranges.Count -gt 0 -and $ranges[$ranges.Count - 1].Merge($range)) { # Merged into last entry Continue @@ -260,9 +248,16 @@ If (-not $NoOverrides) { } } +$ranges.RemoveAll({ $args[0].Width -eq [CodepointWidth]::Narrow }) | Out-Null + +$c = 0 +ForEach($_ in $ranges) { + $c += $_.End - $_.Start + 1 +} + # Emit Code " // Generated by {0} -Pack:{1} -Full:{2} -NoOverrides:{3}" -f $MyInvocation.MyCommand.Name, $Pack, $Full, $NoOverrides -" // on {0} (UTC) from {1}." -f (Get-Date -AsUTC), $InputObject.ucd.description +" // on {0} from {1}." -f (Get-Date -AsUTC -Format "u"), $InputObject.ucd.description " // {0} (0x{0:X}) codepoints covered." -f $c If (-not $NoOverrides) { " // {0} (0x{0:X}) codepoints overridden." -f $overrideCount @@ -270,11 +265,12 @@ If (-not $NoOverrides) { } " static constexpr std::array s_wideAndAmbiguousTable{{" -f $ranges.Count ForEach($_ in $ranges) { + $isAmbiguous = $_.Width -eq [CodepointWidth]::Ambiguous $comment = "" if ($null -ne $_.Comment) { # We only vend comments when we aren't packing tightly $comment = " // {0}" -f $_.Comment } -" UnicodeRange{{ 0x{0:x}, 0x{1:x}, CodepointWidth::{2} }},{3}" -f $_.Start, $_.End, $_.Width, $comment +" UnicodeRange{{ 0x{0:x}, 0x{1:x}, {2} }},{3}" -f $_.Start, $_.End, [int]$isAmbiguous, $comment } " };"