| #!/usr/bin/env python3 |
| # |
| # Script to generate tables for libstdc++ std::text_encoding. |
| # |
| # This file is part of GCC. |
| # |
| # GCC is free software; you can redistribute it and/or modify it under |
| # the terms of the GNU General Public License as published by the Free |
| # Software Foundation; either version 3, or (at your option) any later |
| # version. |
| # |
| # GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| # WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| # for more details. |
| # |
| # You should have received a copy of the GNU General Public License |
| # along with GCC; see the file COPYING3. If not see |
| # <http://www.gnu.org/licenses/>. |
| |
| # To update the Libstdc++ static data in <bits/text_encoding-data.h> download |
| # the latest: |
| # https://www.iana.org/assignments/character-sets/character-sets-1.csv |
| # Then run this script and save the output to |
| # include/bits/text_encoding-data.h |
| |
| import sys |
| import csv |
| import os |
| |
| if len(sys.argv) != 2: |
| print("Usage: %s <character sets csv>" % sys.argv[0], file=sys.stderr) |
| sys.exit(1) |
| |
| self = os.path.basename(__file__) |
| print("// Generated by scripts/{}, do not edit.".format(self)) |
| print(""" |
| |
| // Copyright The GNU Toolchain Authors. |
| // |
| // This file is part of the GNU ISO C++ Library. This library is free |
| // software; you can redistribute it and/or modify it under the |
| // terms of the GNU General Public License as published by the |
| // Free Software Foundation; either version 3, or (at your option) |
| // any later version. |
| |
| // This library is distributed in the hope that it will be useful, |
| // but WITHOUT ANY WARRANTY; without even the implied warranty of |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| // GNU General Public License for more details. |
| |
| // Under Section 7 of GPL version 3, you are granted additional |
| // permissions described in the GCC Runtime Library Exception, version |
| // 3.1, as published by the Free Software Foundation. |
| |
| // You should have received a copy of the GNU General Public License and |
| // a copy of the GCC Runtime Library Exception along with this program; |
| // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| // <http://www.gnu.org/licenses/>. |
| |
| /** @file bits/text_encoding-data.h |
| * This is an internal header file, included by other library headers. |
| * Do not attempt to use it directly. @headername{text_encoding} |
| */ |
| """) |
| print("#ifndef _GLIBCXX_GET_ENCODING_DATA") |
| print('# error "This is not a public header, do not include it directly"') |
| print("#endif\n") |
| |
| # We need to generate a list of initializers of the form { mib, alias }, e.g., |
| # { 3, "US-ASCII" }, |
| # { 3, "ISO646-US" }, |
| # { 3, "csASCII" }, |
| # { 4, "ISO_8859-1:1987" }, |
| # { 4, "latin1" }, |
| # The initializers must be sorted by the mib value. The first entry for |
| # a given mib must be the primary name for the encoding. Any aliases for |
| # the encoding come after the primary name. |
| # We also define a macro _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET which is the |
| # offset into the list of the mib=106, alias="UTF-8" entry. This is used |
| # to optimize the common case, so we don't need to search for "UTF-8". |
| |
| charsets = {} |
| with open(sys.argv[1], newline='') as f: |
| reader = csv.reader(f) |
| next(reader) # skip header row |
| for row in reader: |
| mib = int(row[2]) |
| if mib in charsets: |
| raise ValueError("Multiple rows for mibEnum={}".format(mib)) |
| name = row[1] |
| aliases = row[5].split() |
| # Ensure primary name comes first |
| if name in aliases: |
| aliases.remove(name) |
| charsets[mib] = [name] + aliases |
| |
| # Remove "NATS-DANO" and "NATS-DANO-ADD" as specified by the C++ standard. |
| charsets.pop(33, None) |
| charsets.pop(34, None) |
| |
| # This is not an official IANA alias, but we include it in the |
| # implementation-defined superset of aliases for US-ASCII. |
| # See also LWG 4043. |
| extra_aliases = {3: ["ASCII"]} |
| |
| count = 0 |
| for mib in sorted(charsets.keys()): |
| names = charsets[mib] |
| if names[0] == "UTF-8": |
| print("#define _GLIBCXX_TEXT_ENCODING_UTF8_OFFSET {}".format(count)) |
| for name in names: |
| print(' {{ {:4}, "{}" }},'.format(mib, name)) |
| count += len(names) |
| if mib in extra_aliases: |
| names = extra_aliases[mib] |
| for name in names: |
| print(' {{ {:4}, "{}" }}, // libstdc++ extension'.format(mib, name)) |
| count += len(names) |
| |
| # <text_encoding> gives an error if this macro is left defined. |
| # Do this last, so that the generated output is not usable unless we reach here. |
| print("\n#undef _GLIBCXX_GET_ENCODING_DATA") |