|  | //===--- IntegerLiteralSeparatorFixer.cpp -----------------------*- C++ -*-===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | /// | 
|  | /// \file | 
|  | /// This file implements IntegerLiteralSeparatorFixer that fixes C++ integer | 
|  | /// literal separators. | 
|  | /// | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "IntegerLiteralSeparatorFixer.h" | 
|  |  | 
|  | namespace clang { | 
|  | namespace format { | 
|  |  | 
|  | enum class Base { Binary, Decimal, Hex, Other }; | 
|  |  | 
|  | static Base getBase(const StringRef IntegerLiteral) { | 
|  | assert(IntegerLiteral.size() > 1); | 
|  |  | 
|  | if (IntegerLiteral[0] > '0') { | 
|  | assert(IntegerLiteral[0] <= '9'); | 
|  | return Base::Decimal; | 
|  | } | 
|  |  | 
|  | assert(IntegerLiteral[0] == '0'); | 
|  |  | 
|  | switch (IntegerLiteral[1]) { | 
|  | case 'b': | 
|  | case 'B': | 
|  | return Base::Binary; | 
|  | case 'x': | 
|  | case 'X': | 
|  | return Base::Hex; | 
|  | default: | 
|  | return Base::Other; | 
|  | } | 
|  | } | 
|  |  | 
|  | std::pair<tooling::Replacements, unsigned> | 
|  | IntegerLiteralSeparatorFixer::process(const Environment &Env, | 
|  | const FormatStyle &Style) { | 
|  | switch (Style.Language) { | 
|  | case FormatStyle::LK_Cpp: | 
|  | case FormatStyle::LK_ObjC: | 
|  | Separator = '\''; | 
|  | break; | 
|  | case FormatStyle::LK_CSharp: | 
|  | case FormatStyle::LK_Java: | 
|  | case FormatStyle::LK_JavaScript: | 
|  | Separator = '_'; | 
|  | break; | 
|  | default: | 
|  | return {}; | 
|  | } | 
|  |  | 
|  | const auto &Option = Style.IntegerLiteralSeparator; | 
|  | const auto Binary = Option.Binary; | 
|  | const auto Decimal = Option.Decimal; | 
|  | const auto Hex = Option.Hex; | 
|  | const bool SkipBinary = Binary == 0; | 
|  | const bool SkipDecimal = Decimal == 0; | 
|  | const bool SkipHex = Hex == 0; | 
|  |  | 
|  | if (SkipBinary && SkipDecimal && SkipHex) | 
|  | return {}; | 
|  |  | 
|  | const auto BinaryMinDigits = | 
|  | std::max((int)Option.BinaryMinDigits, Binary + 1); | 
|  | const auto DecimalMinDigits = | 
|  | std::max((int)Option.DecimalMinDigits, Decimal + 1); | 
|  | const auto HexMinDigits = std::max((int)Option.HexMinDigits, Hex + 1); | 
|  |  | 
|  | const auto &SourceMgr = Env.getSourceManager(); | 
|  | AffectedRangeManager AffectedRangeMgr(SourceMgr, Env.getCharRanges()); | 
|  |  | 
|  | const auto ID = Env.getFileID(); | 
|  | const auto LangOpts = getFormattingLangOpts(Style); | 
|  | Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts); | 
|  | Lex.SetCommentRetentionState(true); | 
|  |  | 
|  | Token Tok; | 
|  | tooling::Replacements Result; | 
|  |  | 
|  | for (bool Skip = false; !Lex.LexFromRawLexer(Tok);) { | 
|  | auto Length = Tok.getLength(); | 
|  | if (Length < 2) | 
|  | continue; | 
|  | auto Location = Tok.getLocation(); | 
|  | auto Text = StringRef(SourceMgr.getCharacterData(Location), Length); | 
|  | if (Tok.is(tok::comment)) { | 
|  | if (isClangFormatOff(Text)) | 
|  | Skip = true; | 
|  | else if (isClangFormatOn(Text)) | 
|  | Skip = false; | 
|  | continue; | 
|  | } | 
|  | if (Skip || Tok.isNot(tok::numeric_constant) || Text[0] == '.' || | 
|  | !AffectedRangeMgr.affectsCharSourceRange( | 
|  | CharSourceRange::getCharRange(Location, Tok.getEndLoc()))) { | 
|  | continue; | 
|  | } | 
|  | const auto B = getBase(Text); | 
|  | const bool IsBase2 = B == Base::Binary; | 
|  | const bool IsBase10 = B == Base::Decimal; | 
|  | const bool IsBase16 = B == Base::Hex; | 
|  | if ((IsBase2 && SkipBinary) || (IsBase10 && SkipDecimal) || | 
|  | (IsBase16 && SkipHex) || B == Base::Other) { | 
|  | continue; | 
|  | } | 
|  | if (Style.isCpp()) { | 
|  | // Hex alpha digits a-f/A-F must be at the end of the string literal. | 
|  | StringRef Suffixes = "_himnsuyd"; | 
|  | if (const auto Pos = | 
|  | Text.find_first_of(IsBase16 ? Suffixes.drop_back() : Suffixes); | 
|  | Pos != StringRef::npos) { | 
|  | Text = Text.substr(0, Pos); | 
|  | Length = Pos; | 
|  | } | 
|  | } | 
|  | if ((IsBase10 && Text.find_last_of(".eEfFdDmM") != StringRef::npos) || | 
|  | (IsBase16 && Text.find_last_of(".pP") != StringRef::npos)) { | 
|  | continue; | 
|  | } | 
|  | const auto Start = Text[0] == '0' ? 2 : 0; | 
|  | auto End = Text.find_first_of("uUlLzZn", Start); | 
|  | if (End == StringRef::npos) | 
|  | End = Length; | 
|  | if (Start > 0 || End < Length) { | 
|  | Length = End - Start; | 
|  | Text = Text.substr(Start, Length); | 
|  | } | 
|  | auto DigitsPerGroup = Decimal; | 
|  | auto MinDigits = DecimalMinDigits; | 
|  | if (IsBase2) { | 
|  | DigitsPerGroup = Binary; | 
|  | MinDigits = BinaryMinDigits; | 
|  | } else if (IsBase16) { | 
|  | DigitsPerGroup = Hex; | 
|  | MinDigits = HexMinDigits; | 
|  | } | 
|  | const auto SeparatorCount = Text.count(Separator); | 
|  | const int DigitCount = Length - SeparatorCount; | 
|  | const bool RemoveSeparator = DigitsPerGroup < 0 || DigitCount < MinDigits; | 
|  | if (RemoveSeparator && SeparatorCount == 0) | 
|  | continue; | 
|  | if (!RemoveSeparator && SeparatorCount > 0 && | 
|  | checkSeparator(Text, DigitsPerGroup)) { | 
|  | continue; | 
|  | } | 
|  | const auto &Formatted = | 
|  | format(Text, DigitsPerGroup, DigitCount, RemoveSeparator); | 
|  | assert(Formatted != Text); | 
|  | if (Start > 0) | 
|  | Location = Location.getLocWithOffset(Start); | 
|  | cantFail(Result.add( | 
|  | tooling::Replacement(SourceMgr, Location, Length, Formatted))); | 
|  | } | 
|  |  | 
|  | return {Result, 0}; | 
|  | } | 
|  |  | 
|  | bool IntegerLiteralSeparatorFixer::checkSeparator( | 
|  | const StringRef IntegerLiteral, int DigitsPerGroup) const { | 
|  | assert(DigitsPerGroup > 0); | 
|  |  | 
|  | int I = 0; | 
|  | for (auto C : llvm::reverse(IntegerLiteral)) { | 
|  | if (C == Separator) { | 
|  | if (I < DigitsPerGroup) | 
|  | return false; | 
|  | I = 0; | 
|  | } else { | 
|  | if (I == DigitsPerGroup) | 
|  | return false; | 
|  | ++I; | 
|  | } | 
|  | } | 
|  |  | 
|  | return true; | 
|  | } | 
|  |  | 
|  | std::string IntegerLiteralSeparatorFixer::format(const StringRef IntegerLiteral, | 
|  | int DigitsPerGroup, | 
|  | int DigitCount, | 
|  | bool RemoveSeparator) const { | 
|  | assert(DigitsPerGroup != 0); | 
|  |  | 
|  | std::string Formatted; | 
|  |  | 
|  | if (RemoveSeparator) { | 
|  | for (auto C : IntegerLiteral) | 
|  | if (C != Separator) | 
|  | Formatted.push_back(C); | 
|  | return Formatted; | 
|  | } | 
|  |  | 
|  | int Remainder = DigitCount % DigitsPerGroup; | 
|  |  | 
|  | int I = 0; | 
|  | for (auto C : IntegerLiteral) { | 
|  | if (C == Separator) | 
|  | continue; | 
|  | if (I == (Remainder > 0 ? Remainder : DigitsPerGroup)) { | 
|  | Formatted.push_back(Separator); | 
|  | I = 0; | 
|  | Remainder = 0; | 
|  | } | 
|  | Formatted.push_back(C); | 
|  | ++I; | 
|  | } | 
|  |  | 
|  | return Formatted; | 
|  | } | 
|  |  | 
|  | } // namespace format | 
|  | } // namespace clang |