|  | #!/usr/bin/env python3 | 
|  | # | 
|  | # Check gcc.pot file for stylistic issues as described in | 
|  | # https://gcc.gnu.org/onlinedocs/gccint/Guidelines-for-Diagnostics.html, | 
|  | # especially in gcc-internal-format messages. | 
|  | # | 
|  | # This file is part of GCC. | 
|  | # | 
|  | # GCC is free software; you can redistribute it and/or modify it under | 
|  | # the terms of the GNU General Public License as published by the Free | 
|  | # Software Foundation; either version 3, or (at your option) any later | 
|  | # version. | 
|  | # | 
|  | # GCC is distributed in the hope that it will be useful, but WITHOUT ANY | 
|  | # WARRANTY; without even the implied warranty of MERCHANTABILITY or | 
|  | # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License | 
|  | # for more details. | 
|  | # | 
|  | # You should have received a copy of the GNU General Public License | 
|  | # along with GCC; see the file COPYING3.  If not see | 
|  | # <http://www.gnu.org/licenses/>. | 
|  |  | 
|  | import argparse | 
|  | import re | 
|  | from collections import Counter | 
|  | from typing import Dict, Match | 
|  |  | 
|  | import polib | 
|  |  | 
|  | seen_warnings = Counter() | 
|  |  | 
|  |  | 
|  | def location(msg: polib.POEntry): | 
|  | if msg.occurrences: | 
|  | occ = msg.occurrences[0] | 
|  | return f'{occ[0]}:{occ[1]}' | 
|  | return '<unknown location>' | 
|  |  | 
|  |  | 
|  | def warn(msg: polib.POEntry, | 
|  | diagnostic_id: str, diagnostic: str, include_msgid=True): | 
|  | """ | 
|  | To suppress a warning for a particular message, | 
|  | add a line "#, gcclint:ignore:{diagnostic_id}" to the message. | 
|  | """ | 
|  |  | 
|  | if f'gcclint:ignore:{diagnostic_id}' in msg.flags: | 
|  | return | 
|  |  | 
|  | seen_warnings[diagnostic] += 1 | 
|  |  | 
|  | if include_msgid: | 
|  | print(f'{location(msg)}: {diagnostic} in {repr(msg.msgid)}') | 
|  | else: | 
|  | print(f'{location(msg)}: {diagnostic}') | 
|  |  | 
|  |  | 
|  | def lint_gcc_internal_format(msg: polib.POEntry): | 
|  | """ | 
|  | Checks a single message that has the gcc-internal-format. These | 
|  | messages use a variety of placeholders like %qs, %<quotes%> and | 
|  | %q#E. | 
|  | """ | 
|  |  | 
|  | msgid: str = msg.msgid | 
|  |  | 
|  | def outside_quotes(m: Match[str]): | 
|  | before = msgid[:m.start(0)] | 
|  | return before.count("%<") == before.count("%>") | 
|  |  | 
|  | def lint_matching_placeholders(): | 
|  | """ | 
|  | Warns when literal values in placeholders are not exactly equal | 
|  | in the translation. This can happen when doing copy-and-paste | 
|  | translations of similar messages. | 
|  |  | 
|  | To avoid these mismatches in the first place, | 
|  | structurally equal messages are found by | 
|  | lint_diagnostics_differing_only_in_placeholders. | 
|  |  | 
|  | This check only applies when checking a finished translation | 
|  | such as de.po, not gcc.pot. | 
|  | """ | 
|  |  | 
|  | if not msg.translated(): | 
|  | return | 
|  |  | 
|  | in_msgid = re.findall('%<[^%]+%>', msgid) | 
|  | in_msgstr = re.findall('%<[^%]+%>', msg.msgstr) | 
|  |  | 
|  | if set(in_msgid) != set(in_msgstr): | 
|  | warn(msg, | 
|  | 'placeholder-mismatch', | 
|  | f'placeholder mismatch: msgid has {in_msgid}, ' | 
|  | f'msgstr has {in_msgstr}', | 
|  | include_msgid=False) | 
|  |  | 
|  | def lint_option_outside_quotes(): | 
|  | for match in re.finditer(r'\S+', msgid): | 
|  | part = match.group() | 
|  | if not outside_quotes(match): | 
|  | continue | 
|  |  | 
|  | if part.startswith('-'): | 
|  | if len(part) >= 2 and part[1].isalpha(): | 
|  | if part == '-INF': | 
|  | continue | 
|  |  | 
|  | warn(msg, | 
|  | 'option-outside-quotes', | 
|  | 'command line option outside %<quotes%>') | 
|  |  | 
|  | if part.startswith('__builtin_'): | 
|  | warn(msg, | 
|  | 'builtin-outside-quotes', | 
|  | 'builtin function outside %<quotes%>') | 
|  |  | 
|  | def lint_plain_apostrophe(): | 
|  | for match in re.finditer("[^%]'", msgid): | 
|  | if outside_quotes(match): | 
|  | warn(msg, 'apostrophe', 'apostrophe without leading %') | 
|  |  | 
|  | def lint_space_before_quote(): | 
|  | """ | 
|  | A space before %< is often the result of string literals that | 
|  | are joined by the C compiler and neither literal has a space | 
|  | to separate the words. | 
|  | """ | 
|  |  | 
|  | for match in re.finditer("(.?[a-zA-Z0-9])%<", msgid): | 
|  | if match.group(1) != '%s': | 
|  | warn(msg, | 
|  | 'no-space-before-quote', | 
|  | '%< directly following a letter or digit') | 
|  |  | 
|  | def lint_underscore_outside_quotes(): | 
|  | """ | 
|  | An underscore outside of quotes is used in several contexts, | 
|  | and many of them violate the GCC Guidelines for Diagnostics: | 
|  |  | 
|  | * names of GCC-internal compiler functions | 
|  | * names of GCC-internal data structures | 
|  | * static_cast and the like (which are legitimate) | 
|  | """ | 
|  |  | 
|  | for match in re.finditer("_", msgid): | 
|  | if outside_quotes(match): | 
|  | warn(msg, | 
|  | 'underscore-outside-quotes', | 
|  | 'underscore outside of %<quotes%>') | 
|  | return | 
|  |  | 
|  | def lint_may_not(): | 
|  | """ | 
|  | The term "may not" may either mean "it could be the case" | 
|  | or "should not". These two different meanings are sometimes | 
|  | hard to tell apart. | 
|  | """ | 
|  |  | 
|  | if re.search(r'\bmay not\b', msgid): | 
|  | warn(msg, | 
|  | 'ambiguous-may-not', | 
|  | 'the term "may not" is ambiguous') | 
|  |  | 
|  | def lint_unbalanced_quotes(): | 
|  | if msgid.count("%<") != msgid.count("%>"): | 
|  | warn(msg, | 
|  | 'unbalanced-quotes', | 
|  | 'unbalanced %< and %> quotes') | 
|  |  | 
|  | if msg.translated(): | 
|  | if msg.msgstr.count("%<") != msg.msgstr.count("%>"): | 
|  | warn(msg, | 
|  | 'unbalanced-quotes', | 
|  | 'unbalanced %< and %> quotes') | 
|  |  | 
|  | def lint_single_space_after_sentence(): | 
|  | """ | 
|  | After a sentence there should be two spaces. | 
|  | """ | 
|  |  | 
|  | if re.search(r'[.] [A-Z]', msgid): | 
|  | warn(msg, | 
|  | 'single-space-after-sentence', | 
|  | 'single space after sentence') | 
|  |  | 
|  | def lint_non_canonical_quotes(): | 
|  | """ | 
|  | Catches %<%s%>, which can be written in the shorter form %qs. | 
|  | """ | 
|  | match = re.search("%<%s%>|'%s'|\"%s\"|`%s'", msgid) | 
|  | if match: | 
|  | warn(msg, | 
|  | 'non-canonical-quotes', | 
|  | f'placeholder {match.group()} should be written as %qs') | 
|  |  | 
|  | lint_option_outside_quotes() | 
|  | lint_plain_apostrophe() | 
|  | lint_space_before_quote() | 
|  | lint_underscore_outside_quotes() | 
|  | lint_may_not() | 
|  | lint_unbalanced_quotes() | 
|  | lint_matching_placeholders() | 
|  | lint_single_space_after_sentence() | 
|  | lint_non_canonical_quotes() | 
|  |  | 
|  |  | 
|  | def lint_diagnostics_differing_only_in_placeholders(po: polib.POFile): | 
|  | """ | 
|  | Detects messages that are structurally the same, except that they | 
|  | use different plain strings inside %<quotes%>. These messages can | 
|  | be merged in order to prevent copy-and-paste mistakes by the | 
|  | translators. | 
|  |  | 
|  | See bug 90119. | 
|  | """ | 
|  |  | 
|  | seen: Dict[str, polib.POEntry] = {} | 
|  |  | 
|  | for msg in po: | 
|  | msg: polib.POEntry | 
|  | msgid = msg.msgid | 
|  |  | 
|  | normalized = re.sub('%<[^%]+%>', '%qs', msgid) | 
|  | if normalized not in seen: | 
|  | seen[normalized] = msg | 
|  | seen[msgid] = msg | 
|  | continue | 
|  |  | 
|  | prev = seen[normalized] | 
|  | warn(msg, | 
|  | 'same-pattern', | 
|  | f'same pattern for {repr(msgid)} and ' | 
|  | f'{repr(prev.msgid)} in {location(prev)}', | 
|  | include_msgid=False) | 
|  |  | 
|  |  | 
|  | def lint_file(po: polib.POFile): | 
|  | for msg in po: | 
|  | msg: polib.POEntry | 
|  |  | 
|  | if not msg.obsolete and not msg.fuzzy: | 
|  | if 'gcc-internal-format' in msg.flags: | 
|  | lint_gcc_internal_format(msg) | 
|  |  | 
|  | lint_diagnostics_differing_only_in_placeholders(po) | 
|  |  | 
|  |  | 
|  | def main(): | 
|  | parser = argparse.ArgumentParser(description='') | 
|  | parser.add_argument('file', help='pot file') | 
|  |  | 
|  | args = parser.parse_args() | 
|  |  | 
|  | po = polib.pofile(args.file) | 
|  | lint_file(po) | 
|  |  | 
|  | print() | 
|  | print('summary:') | 
|  | for entry in seen_warnings.most_common(): | 
|  | if entry[1] > 1: | 
|  | print(f'{entry[1]}\t{entry[0]}') | 
|  |  | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | main() |