| // <text_encoding> -*- C++ -*- |
| |
| // Copyright The GNU Toolchain Authors. |
| // |
| // This file is part of the GNU ISO C++ Library. This library is free |
| // software; you can redistribute it and/or modify it under the |
| // terms of the GNU General Public License as published by the |
| // Free Software Foundation; either version 3, or (at your option) |
| // any later version. |
| |
| // This library is distributed in the hope that it will be useful, |
| // but WITHOUT ANY WARRANTY; without even the implied warranty of |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| // GNU General Public License for more details. |
| |
| // Under Section 7 of GPL version 3, you are granted additional |
| // permissions described in the GCC Runtime Library Exception, version |
| // 3.1, as published by the Free Software Foundation. |
| |
| // You should have received a copy of the GNU General Public License and |
| // a copy of the GCC Runtime Library Exception along with this program; |
| // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| // <http://www.gnu.org/licenses/>. |
| |
| /** @file include/text_encoding |
| * This is a Standard C++ Library header. |
| */ |
| |
| #ifndef _GLIBCXX_TEXT_ENCODING |
| #define _GLIBCXX_TEXT_ENCODING |
| |
| #ifdef _GLIBCXX_SYSHDR |
| #pragma GCC system_header |
| #endif |
| |
| #include <bits/requires_hosted.h> |
| |
| #define __glibcxx_want_text_encoding |
| #include <bits/version.h> |
| |
| #ifdef __cpp_lib_text_encoding |
| #include <compare> |
| #include <string_view> |
| #include <bits/functional_hash.h> // hash |
| #include <bits/ranges_util.h> // view_interface |
| #include <bits/unicode.h> // __charset_alias_match |
| #include <ext/numeric_traits.h> // __int_traits |
| |
| namespace std _GLIBCXX_VISIBILITY(default) |
| { |
| _GLIBCXX_BEGIN_NAMESPACE_VERSION |
| |
| /** |
| * @brief An interface for accessing the IANA Character Sets registry. |
| * @ingroup locales |
| * @since C++23 |
| */ |
| struct text_encoding |
| { |
| private: |
| struct _Rep |
| { |
| using id = __INT_LEAST32_TYPE__; |
| id _M_id; |
| const char* _M_name; |
| |
| friend constexpr bool |
| operator<(const _Rep& __r, id __m) noexcept |
| { return __r._M_id < __m; } |
| |
| friend constexpr bool |
| operator==(const _Rep& __r, string_view __name) noexcept |
| { return __r._M_name == __name; } |
| }; |
| |
| public: |
| static constexpr size_t max_name_length = 63; |
| |
| enum class id : _Rep::id |
| { |
| other = 1, |
| unknown = 2, |
| ASCII = 3, |
| ISOLatin1 = 4, |
| ISOLatin2 = 5, |
| ISOLatin3 = 6, |
| ISOLatin4 = 7, |
| ISOLatinCyrillic = 8, |
| ISOLatinArabic = 9, |
| ISOLatinGreek = 10, |
| ISOLatinHebrew = 11, |
| ISOLatin5 = 12, |
| ISOLatin6 = 13, |
| ISOTextComm = 14, |
| HalfWidthKatakana = 15, |
| JISEncoding = 16, |
| ShiftJIS = 17, |
| EUCPkdFmtJapanese = 18, |
| EUCFixWidJapanese = 19, |
| ISO4UnitedKingdom = 20, |
| ISO11SwedishForNames = 21, |
| ISO15Italian = 22, |
| ISO17Spanish = 23, |
| ISO21German = 24, |
| ISO60DanishNorwegian = 25, |
| ISO69French = 26, |
| ISO10646UTF1 = 27, |
| ISO646basic1983 = 28, |
| INVARIANT = 29, |
| ISO2IntlRefVersion = 30, |
| NATSSEFI = 31, |
| NATSSEFIADD = 32, |
| ISO10Swedish = 35, |
| KSC56011987 = 36, |
| ISO2022KR = 37, |
| EUCKR = 38, |
| ISO2022JP = 39, |
| ISO2022JP2 = 40, |
| ISO13JISC6220jp = 41, |
| ISO14JISC6220ro = 42, |
| ISO16Portuguese = 43, |
| ISO18Greek7Old = 44, |
| ISO19LatinGreek = 45, |
| ISO25French = 46, |
| ISO27LatinGreek1 = 47, |
| ISO5427Cyrillic = 48, |
| ISO42JISC62261978 = 49, |
| ISO47BSViewdata = 50, |
| ISO49INIS = 51, |
| ISO50INIS8 = 52, |
| ISO51INISCyrillic = 53, |
| ISO54271981 = 54, |
| ISO5428Greek = 55, |
| ISO57GB1988 = 56, |
| ISO58GB231280 = 57, |
| ISO61Norwegian2 = 58, |
| ISO70VideotexSupp1 = 59, |
| ISO84Portuguese2 = 60, |
| ISO85Spanish2 = 61, |
| ISO86Hungarian = 62, |
| ISO87JISX0208 = 63, |
| ISO88Greek7 = 64, |
| ISO89ASMO449 = 65, |
| ISO90 = 66, |
| ISO91JISC62291984a = 67, |
| ISO92JISC62991984b = 68, |
| ISO93JIS62291984badd = 69, |
| ISO94JIS62291984hand = 70, |
| ISO95JIS62291984handadd = 71, |
| ISO96JISC62291984kana = 72, |
| ISO2033 = 73, |
| ISO99NAPLPS = 74, |
| ISO102T617bit = 75, |
| ISO103T618bit = 76, |
| ISO111ECMACyrillic = 77, |
| ISO121Canadian1 = 78, |
| ISO122Canadian2 = 79, |
| ISO123CSAZ24341985gr = 80, |
| ISO88596E = 81, |
| ISO88596I = 82, |
| ISO128T101G2 = 83, |
| ISO88598E = 84, |
| ISO88598I = 85, |
| ISO139CSN369103 = 86, |
| ISO141JUSIB1002 = 87, |
| ISO143IECP271 = 88, |
| ISO146Serbian = 89, |
| ISO147Macedonian = 90, |
| ISO150 = 91, |
| ISO151Cuba = 92, |
| ISO6937Add = 93, |
| ISO153GOST1976874 = 94, |
| ISO8859Supp = 95, |
| ISO10367Box = 96, |
| ISO158Lap = 97, |
| ISO159JISX02121990 = 98, |
| ISO646Danish = 99, |
| USDK = 100, |
| DKUS = 101, |
| KSC5636 = 102, |
| Unicode11UTF7 = 103, |
| ISO2022CN = 104, |
| ISO2022CNEXT = 105, |
| UTF8 = 106, |
| ISO885913 = 109, |
| ISO885914 = 110, |
| ISO885915 = 111, |
| ISO885916 = 112, |
| GBK = 113, |
| GB18030 = 114, |
| OSDEBCDICDF0415 = 115, |
| OSDEBCDICDF03IRV = 116, |
| OSDEBCDICDF041 = 117, |
| ISO115481 = 118, |
| KZ1048 = 119, |
| UCS2 = 1000, |
| UCS4 = 1001, |
| UnicodeASCII = 1002, |
| UnicodeLatin1 = 1003, |
| UnicodeJapanese = 1004, |
| UnicodeIBM1261 = 1005, |
| UnicodeIBM1268 = 1006, |
| UnicodeIBM1276 = 1007, |
| UnicodeIBM1264 = 1008, |
| UnicodeIBM1265 = 1009, |
| Unicode11 = 1010, |
| SCSU = 1011, |
| UTF7 = 1012, |
| UTF16BE = 1013, |
| UTF16LE = 1014, |
| UTF16 = 1015, |
| CESU8 = 1016, |
| UTF32 = 1017, |
| UTF32BE = 1018, |
| UTF32LE = 1019, |
| BOCU1 = 1020, |
| UTF7IMAP = 1021, |
| Windows30Latin1 = 2000, |
| Windows31Latin1 = 2001, |
| Windows31Latin2 = 2002, |
| Windows31Latin5 = 2003, |
| HPRoman8 = 2004, |
| AdobeStandardEncoding = 2005, |
| VenturaUS = 2006, |
| VenturaInternational = 2007, |
| DECMCS = 2008, |
| PC850Multilingual = 2009, |
| PC8DanishNorwegian = 2012, |
| PC862LatinHebrew = 2013, |
| PC8Turkish = 2014, |
| IBMSymbols = 2015, |
| IBMThai = 2016, |
| HPLegal = 2017, |
| HPPiFont = 2018, |
| HPMath8 = 2019, |
| HPPSMath = 2020, |
| HPDesktop = 2021, |
| VenturaMath = 2022, |
| MicrosoftPublishing = 2023, |
| Windows31J = 2024, |
| GB2312 = 2025, |
| Big5 = 2026, |
| Macintosh = 2027, |
| IBM037 = 2028, |
| IBM038 = 2029, |
| IBM273 = 2030, |
| IBM274 = 2031, |
| IBM275 = 2032, |
| IBM277 = 2033, |
| IBM278 = 2034, |
| IBM280 = 2035, |
| IBM281 = 2036, |
| IBM284 = 2037, |
| IBM285 = 2038, |
| IBM290 = 2039, |
| IBM297 = 2040, |
| IBM420 = 2041, |
| IBM423 = 2042, |
| IBM424 = 2043, |
| PC8CodePage437 = 2011, |
| IBM500 = 2044, |
| IBM851 = 2045, |
| PCp852 = 2010, |
| IBM855 = 2046, |
| IBM857 = 2047, |
| IBM860 = 2048, |
| IBM861 = 2049, |
| IBM863 = 2050, |
| IBM864 = 2051, |
| IBM865 = 2052, |
| IBM868 = 2053, |
| IBM869 = 2054, |
| IBM870 = 2055, |
| IBM871 = 2056, |
| IBM880 = 2057, |
| IBM891 = 2058, |
| IBM903 = 2059, |
| IBM904 = 2060, |
| IBM905 = 2061, |
| IBM918 = 2062, |
| IBM1026 = 2063, |
| IBMEBCDICATDE = 2064, |
| EBCDICATDEA = 2065, |
| EBCDICCAFR = 2066, |
| EBCDICDKNO = 2067, |
| EBCDICDKNOA = 2068, |
| EBCDICFISE = 2069, |
| EBCDICFISEA = 2070, |
| EBCDICFR = 2071, |
| EBCDICIT = 2072, |
| EBCDICPT = 2073, |
| EBCDICES = 2074, |
| EBCDICESA = 2075, |
| EBCDICESS = 2076, |
| EBCDICUK = 2077, |
| EBCDICUS = 2078, |
| Unknown8BiT = 2079, |
| Mnemonic = 2080, |
| Mnem = 2081, |
| VISCII = 2082, |
| VIQR = 2083, |
| KOI8R = 2084, |
| HZGB2312 = 2085, |
| IBM866 = 2086, |
| PC775Baltic = 2087, |
| KOI8U = 2088, |
| IBM00858 = 2089, |
| IBM00924 = 2090, |
| IBM01140 = 2091, |
| IBM01141 = 2092, |
| IBM01142 = 2093, |
| IBM01143 = 2094, |
| IBM01144 = 2095, |
| IBM01145 = 2096, |
| IBM01146 = 2097, |
| IBM01147 = 2098, |
| IBM01148 = 2099, |
| IBM01149 = 2100, |
| Big5HKSCS = 2101, |
| IBM1047 = 2102, |
| PTCP154 = 2103, |
| Amiga1251 = 2104, |
| KOI7switched = 2105, |
| BRF = 2106, |
| TSCII = 2107, |
| CP51932 = 2108, |
| windows874 = 2109, |
| windows1250 = 2250, |
| windows1251 = 2251, |
| windows1252 = 2252, |
| windows1253 = 2253, |
| windows1254 = 2254, |
| windows1255 = 2255, |
| windows1256 = 2256, |
| windows1257 = 2257, |
| windows1258 = 2258, |
| TIS620 = 2259, |
| CP50220 = 2260 |
| }; |
| using enum id; |
| |
| constexpr text_encoding() = default; |
| |
| constexpr explicit |
| text_encoding(string_view __enc) noexcept |
| : _M_rep(_S_find_name(__enc)) |
| { |
| __enc.copy(_M_name, max_name_length); |
| } |
| |
| // @pre i has the value of one of the enumerators of id. |
| constexpr |
| text_encoding(id __i) noexcept |
| : _M_rep(_S_find_id(__i)) |
| { |
| if (string_view __name(_M_rep->_M_name); !__name.empty()) |
| __name.copy(_M_name, max_name_length); |
| } |
| |
| constexpr id mib() const noexcept { return id(_M_rep->_M_id); } |
| |
| constexpr const char* name() const noexcept { return _M_name; } |
| |
| struct aliases_view : ranges::view_interface<aliases_view> |
| { |
| private: |
| class _Iterator; |
| struct _Sentinel { }; |
| |
| public: |
| constexpr _Iterator begin() const noexcept; |
| constexpr _Sentinel end() const noexcept { return {}; } |
| |
| private: |
| friend struct text_encoding; |
| |
| constexpr explicit aliases_view(const _Rep* __r) : _M_begin(__r) { } |
| |
| const _Rep* _M_begin = nullptr; |
| }; |
| |
| constexpr aliases_view |
| aliases() const noexcept |
| { |
| return _M_rep->_M_name[0] ? aliases_view(_M_rep) : aliases_view{nullptr}; |
| } |
| |
| friend constexpr bool |
| operator==(const text_encoding& __a, |
| const text_encoding& __b) noexcept |
| { |
| if (__a.mib() == id::other && __b.mib() == id::other) [[unlikely]] |
| return _S_comp(__a._M_name, __b._M_name); |
| else |
| return __a.mib() == __b.mib(); |
| } |
| |
| friend constexpr bool |
| operator==(const text_encoding& __encoding, id __i) noexcept |
| { return __encoding.mib() == __i; } |
| |
| #if __CHAR_BIT__ == 8 |
| static consteval text_encoding |
| literal() noexcept |
| { |
| #ifdef __GNUC_EXECUTION_CHARSET_NAME |
| return text_encoding(__GNUC_EXECUTION_CHARSET_NAME); |
| #elif defined __clang_literal_encoding__ |
| return text_encoding(__clang_literal_encoding__); |
| #else |
| return text_encoding(); |
| #endif |
| } |
| |
| static text_encoding |
| environment(); |
| |
| template<id _Id> |
| static bool |
| environment_is() |
| { return text_encoding(_Id)._M_is_environment(); } |
| #else |
| static text_encoding literal() = delete; |
| static text_encoding environment() = delete; |
| template<id> static bool environment_is() = delete; |
| #endif |
| |
| private: |
| const _Rep* _M_rep = _S_reps + 1; // id::unknown |
| char _M_name[max_name_length + 1] = {0}; |
| |
| bool |
| _M_is_environment() const; |
| |
| static inline constexpr _Rep _S_reps[] = { |
| { 1, "" }, { 2, "" }, |
| #define _GLIBCXX_GET_ENCODING_DATA |
| #include <bits/text_encoding-data.h> |
| #ifdef _GLIBCXX_GET_ENCODING_DATA |
| # error "Invalid text_encoding data" |
| #endif |
| { 9999, nullptr }, // sentinel |
| }; |
| |
| static constexpr bool |
| _S_comp(string_view __a, string_view __b) |
| { return __unicode::__charset_alias_match(__a, __b); } |
| |
| static constexpr const _Rep* |
| _S_find_name(string_view __name) noexcept |
| { |
| #ifdef _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET |
| // Optimize the common UTF-8 case to avoid a linear search through all |
| // strings in the table using the _S_comp function. |
| if (__name == "UTF-8") |
| return _S_reps + 2 + _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET; |
| #endif |
| |
| // The first two array elements (other and unknown) don't have names. |
| // The last element is a sentinel that can never match anything. |
| const auto __first = _S_reps + 2, __end = std::end(_S_reps) - 1; |
| for (auto __r = __first; __r != __end; ++__r) |
| if (_S_comp(__r->_M_name, __name)) |
| { |
| // Might have matched an alias. Find the first entry for this ID. |
| const auto __id = __r->_M_id; |
| while (__r[-1]._M_id == __id) |
| --__r; |
| return __r; |
| } |
| return _S_reps; // id::other |
| } |
| |
| static constexpr const _Rep* |
| _S_find_id(id __id) noexcept |
| { |
| const auto __i = (_Rep::id)__id; |
| const auto __r = std::lower_bound(_S_reps, std::end(_S_reps) - 1, __i); |
| if (__r->_M_id == __i) [[likely]] |
| return __r; |
| else |
| { |
| // Preconditions: i has the value of one of the enumerators of id. |
| __glibcxx_assert(__r->_M_id == __i); |
| return _S_reps + 1; // id::unknown |
| } |
| } |
| }; |
| |
| template<> |
| struct hash<text_encoding> |
| { |
| size_t |
| operator()(const text_encoding& __enc) const noexcept |
| { return std::hash<text_encoding::id>()(__enc.mib()); } |
| }; |
| |
| class text_encoding::aliases_view::_Iterator |
| { |
| public: |
| using value_type = const char*; |
| using reference = const char*; |
| using difference_type = int; |
| |
| constexpr _Iterator() = default; |
| |
| constexpr value_type |
| operator*() const |
| { |
| if (_M_dereferenceable()) [[likely]] |
| return _M_rep->_M_name; |
| else |
| { |
| __glibcxx_assert(_M_dereferenceable()); |
| return ""; |
| } |
| } |
| |
| constexpr _Iterator& |
| operator++() |
| { |
| if (_M_dereferenceable()) [[likely]] |
| ++_M_rep; |
| else |
| { |
| __glibcxx_assert(_M_dereferenceable()); |
| *this = _Iterator{}; |
| } |
| return *this; |
| } |
| |
| constexpr _Iterator& |
| operator--() |
| { |
| const bool __decrementable |
| = _M_rep != nullptr && _M_rep[-1]._M_id == _M_id; |
| if (__decrementable) [[likely]] |
| --_M_rep; |
| else |
| { |
| __glibcxx_assert(__decrementable); |
| *this = _Iterator{}; |
| } |
| return *this; |
| } |
| |
| constexpr _Iterator |
| operator++(int) |
| { |
| auto __it = *this; |
| ++*this; |
| return __it; |
| } |
| |
| constexpr _Iterator |
| operator--(int) |
| { |
| auto __it = *this; |
| --*this; |
| return __it; |
| } |
| |
| constexpr value_type |
| operator[](difference_type __n) const |
| { return *(*this + __n); } |
| |
| constexpr _Iterator& |
| operator+=(difference_type __n) |
| { |
| if (_M_rep != nullptr) |
| { |
| if (__n > 0) |
| { |
| if (__n < (std::end(_S_reps) - _M_rep) |
| && _M_rep[__n - 1]._M_id == _M_id) [[likely]] |
| _M_rep += __n; |
| else |
| *this = _Iterator{}; |
| } |
| else if (__n < 0) |
| { |
| if (__n > (_S_reps - _M_rep) |
| && _M_rep[__n]._M_id == _M_id) [[likely]] |
| _M_rep += __n; |
| else |
| *this = _Iterator{}; |
| } |
| } |
| if (__n != 0) |
| __glibcxx_assert(_M_rep != nullptr); |
| return *this; |
| } |
| |
| constexpr _Iterator& |
| operator-=(difference_type __n) |
| { |
| using _Traits = __gnu_cxx::__int_traits<difference_type>; |
| if (__n == _Traits::__min) [[unlikely]] |
| return operator+=(_Traits::__max); |
| return operator+=(-__n); |
| } |
| |
| constexpr difference_type |
| operator-(const _Iterator& __i) const |
| { |
| if (_M_id == __i._M_id) |
| return _M_rep - __i._M_rep; |
| __glibcxx_assert(_M_id == __i._M_id); |
| return __gnu_cxx::__int_traits<difference_type>::__max; |
| } |
| |
| constexpr bool |
| operator==(const _Iterator&) const = default; |
| |
| constexpr bool |
| operator==(_Sentinel) const noexcept |
| { return !_M_dereferenceable(); } |
| |
| constexpr strong_ordering |
| operator<=>(const _Iterator& __i) const |
| { |
| __glibcxx_assert(_M_id == __i._M_id); |
| return _M_rep <=> __i._M_rep; |
| } |
| |
| friend constexpr _Iterator |
| operator+(_Iterator __i, difference_type __n) |
| { |
| __i += __n; |
| return __i; |
| } |
| |
| friend constexpr _Iterator |
| operator+(difference_type __n, _Iterator __i) |
| { |
| __i += __n; |
| return __i; |
| } |
| |
| friend constexpr _Iterator |
| operator-(_Iterator __i, difference_type __n) |
| { |
| __i -= __n; |
| return __i; |
| } |
| |
| private: |
| friend struct text_encoding; |
| |
| constexpr explicit |
| _Iterator(const _Rep* __r) noexcept |
| : _M_rep(__r), _M_id(__r ? __r->_M_id : 0) |
| { } |
| |
| constexpr bool |
| _M_dereferenceable() const noexcept |
| { return _M_rep != nullptr && _M_rep->_M_id == _M_id; } |
| |
| const _Rep* _M_rep = nullptr; |
| _Rep::id _M_id = 0; |
| }; |
| |
| constexpr auto |
| text_encoding::aliases_view::begin() const noexcept |
| -> _Iterator |
| { return _Iterator(_M_begin); } |
| |
| namespace ranges |
| { |
| // Opt-in to borrowed_range concept |
| template<> |
| inline constexpr bool |
| enable_borrowed_range<std::text_encoding::aliases_view> = true; |
| } |
| |
| _GLIBCXX_END_NAMESPACE_VERSION |
| } // namespace std |
| |
| #endif // __cpp_lib_text_encoding |
| #endif // _GLIBCXX_TEXT_ENCODING |