| //===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===// | 
 | // | 
 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
 | // See https://llvm.org/LICENSE.txt for license information. | 
 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
 | // | 
 | //===----------------------------------------------------------------------===// | 
 |  | 
 | #include "clang/Tooling/Inclusions/HeaderIncludes.h" | 
 | #include "clang/Basic/FileManager.h" | 
 | #include "clang/Basic/SourceManager.h" | 
 | #include "clang/Lex/Lexer.h" | 
 | #include "llvm/Support/FormatVariadic.h" | 
 | #include "llvm/Support/Path.h" | 
 | #include <optional> | 
 |  | 
 | namespace clang { | 
 | namespace tooling { | 
 | namespace { | 
 |  | 
 | LangOptions createLangOpts() { | 
 |   LangOptions LangOpts; | 
 |   LangOpts.CPlusPlus = 1; | 
 |   LangOpts.CPlusPlus11 = 1; | 
 |   LangOpts.CPlusPlus14 = 1; | 
 |   LangOpts.LineComment = 1; | 
 |   LangOpts.CXXOperatorNames = 1; | 
 |   LangOpts.Bool = 1; | 
 |   LangOpts.ObjC = 1; | 
 |   LangOpts.MicrosoftExt = 1;    // To get kw___try, kw___finally. | 
 |   LangOpts.DeclSpecKeyword = 1; // To get __declspec. | 
 |   LangOpts.WChar = 1;           // To get wchar_t | 
 |   return LangOpts; | 
 | } | 
 |  | 
 | // Returns the offset after skipping a sequence of tokens, matched by \p | 
 | // GetOffsetAfterSequence, from the start of the code. | 
 | // \p GetOffsetAfterSequence should be a function that matches a sequence of | 
 | // tokens and returns an offset after the sequence. | 
 | unsigned getOffsetAfterTokenSequence( | 
 |     StringRef FileName, StringRef Code, const IncludeStyle &Style, | 
 |     llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)> | 
 |         GetOffsetAfterSequence) { | 
 |   SourceManagerForFile VirtualSM(FileName, Code); | 
 |   SourceManager &SM = VirtualSM.get(); | 
 |   LangOptions LangOpts = createLangOpts(); | 
 |   Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(SM.getMainFileID()), SM, | 
 |             LangOpts); | 
 |   Token Tok; | 
 |   // Get the first token. | 
 |   Lex.LexFromRawLexer(Tok); | 
 |   return GetOffsetAfterSequence(SM, Lex, Tok); | 
 | } | 
 |  | 
 | // Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is, | 
 | // \p Tok will be the token after this directive; otherwise, it can be any token | 
 | // after the given \p Tok (including \p Tok). If \p RawIDName is provided, the | 
 | // (second) raw_identifier name is checked. | 
 | bool checkAndConsumeDirectiveWithName( | 
 |     Lexer &Lex, StringRef Name, Token &Tok, | 
 |     std::optional<StringRef> RawIDName = std::nullopt) { | 
 |   bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && | 
 |                  Tok.is(tok::raw_identifier) && | 
 |                  Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) && | 
 |                  Tok.is(tok::raw_identifier) && | 
 |                  (!RawIDName || Tok.getRawIdentifier() == *RawIDName); | 
 |   if (Matched) | 
 |     Lex.LexFromRawLexer(Tok); | 
 |   return Matched; | 
 | } | 
 |  | 
 | void skipComments(Lexer &Lex, Token &Tok) { | 
 |   while (Tok.is(tok::comment)) | 
 |     if (Lex.LexFromRawLexer(Tok)) | 
 |       return; | 
 | } | 
 |  | 
 | // Returns the offset after header guard directives and any comments | 
 | // before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no | 
 | // header guard is present in the code, this will return the offset after | 
 | // skipping all comments from the start of the code. | 
 | unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName, | 
 |                                                StringRef Code, | 
 |                                                const IncludeStyle &Style) { | 
 |   // \p Consume returns location after header guard or 0 if no header guard is | 
 |   // found. | 
 |   auto ConsumeHeaderGuardAndComment = | 
 |       [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex, | 
 |                                  Token Tok)> | 
 |               Consume) { | 
 |         return getOffsetAfterTokenSequence( | 
 |             FileName, Code, Style, | 
 |             [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) { | 
 |               skipComments(Lex, Tok); | 
 |               unsigned InitialOffset = SM.getFileOffset(Tok.getLocation()); | 
 |               return std::max(InitialOffset, Consume(SM, Lex, Tok)); | 
 |             }); | 
 |       }; | 
 |   return std::max( | 
 |       // #ifndef/#define | 
 |       ConsumeHeaderGuardAndComment( | 
 |           [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { | 
 |             if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) { | 
 |               skipComments(Lex, Tok); | 
 |               if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) && | 
 |                   Tok.isAtStartOfLine()) | 
 |                 return SM.getFileOffset(Tok.getLocation()); | 
 |             } | 
 |             return 0; | 
 |           }), | 
 |       // #pragma once | 
 |       ConsumeHeaderGuardAndComment( | 
 |           [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned { | 
 |             if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok, | 
 |                                                  StringRef("once"))) | 
 |               return SM.getFileOffset(Tok.getLocation()); | 
 |             return 0; | 
 |           })); | 
 | } | 
 |  | 
 | // Check if a sequence of tokens is like | 
 | //    "#include ("header.h" | <header.h>)". | 
 | // If it is, \p Tok will be the token after this directive; otherwise, it can be | 
 | // any token after the given \p Tok (including \p Tok). | 
 | bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) { | 
 |   auto Matched = [&]() { | 
 |     Lex.LexFromRawLexer(Tok); | 
 |     return true; | 
 |   }; | 
 |   if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) && | 
 |       Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") { | 
 |     if (Lex.LexFromRawLexer(Tok)) | 
 |       return false; | 
 |     if (Tok.is(tok::string_literal)) | 
 |       return Matched(); | 
 |     if (Tok.is(tok::less)) { | 
 |       while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) { | 
 |       } | 
 |       if (Tok.is(tok::greater)) | 
 |         return Matched(); | 
 |     } | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | // Returns the offset of the last #include directive after which a new | 
 | // #include can be inserted. This ignores #include's after the #include block(s) | 
 | // in the beginning of a file to avoid inserting headers into code sections | 
 | // where new #include's should not be added by default. | 
 | // These code sections include: | 
 | //      - raw string literals (containing #include). | 
 | //      - #if blocks. | 
 | //      - Special #include's among declarations (e.g. functions). | 
 | // | 
 | // If no #include after which a new #include can be inserted, this returns the | 
 | // offset after skipping all comments from the start of the code. | 
 | // Inserting after an #include is not allowed if it comes after code that is not | 
 | // #include (e.g. pre-processing directive that is not #include, declarations). | 
 | unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code, | 
 |                                      const IncludeStyle &Style) { | 
 |   return getOffsetAfterTokenSequence( | 
 |       FileName, Code, Style, | 
 |       [](const SourceManager &SM, Lexer &Lex, Token Tok) { | 
 |         skipComments(Lex, Tok); | 
 |         unsigned MaxOffset = SM.getFileOffset(Tok.getLocation()); | 
 |         while (checkAndConsumeInclusiveDirective(Lex, Tok)) | 
 |           MaxOffset = SM.getFileOffset(Tok.getLocation()); | 
 |         return MaxOffset; | 
 |       }); | 
 | } | 
 |  | 
 | inline StringRef trimInclude(StringRef IncludeName) { | 
 |   return IncludeName.trim("\"<>"); | 
 | } | 
 |  | 
 | const char IncludeRegexPattern[] = | 
 |     R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))"; | 
 |  | 
 | // The filename of Path excluding extension. | 
 | // Used to match implementation with headers, this differs from sys::path::stem: | 
 | //  - in names with multiple dots (foo.cu.cc) it terminates at the *first* | 
 | //  - an empty stem is never returned: /foo/.bar.x => .bar | 
 | //  - we don't bother to handle . and .. specially | 
 | StringRef matchingStem(llvm::StringRef Path) { | 
 |   StringRef Name = llvm::sys::path::filename(Path); | 
 |   return Name.substr(0, Name.find('.', 1)); | 
 | } | 
 |  | 
 | } // anonymous namespace | 
 |  | 
 | IncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style, | 
 |                                                StringRef FileName) | 
 |     : Style(Style), FileName(FileName) { | 
 |   for (const auto &Category : Style.IncludeCategories) { | 
 |     CategoryRegexs.emplace_back(Category.Regex, Category.RegexIsCaseSensitive | 
 |                                                     ? llvm::Regex::NoFlags | 
 |                                                     : llvm::Regex::IgnoreCase); | 
 |   } | 
 |   IsMainFile = FileName.ends_with(".c") || FileName.ends_with(".cc") || | 
 |                FileName.ends_with(".cpp") || FileName.ends_with(".c++") || | 
 |                FileName.ends_with(".cxx") || FileName.ends_with(".m") || | 
 |                FileName.ends_with(".mm"); | 
 |   if (!Style.IncludeIsMainSourceRegex.empty()) { | 
 |     llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex); | 
 |     IsMainFile |= MainFileRegex.match(FileName); | 
 |   } | 
 | } | 
 |  | 
 | int IncludeCategoryManager::getIncludePriority(StringRef IncludeName, | 
 |                                                bool CheckMainHeader) const { | 
 |   int Ret = INT_MAX; | 
 |   for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) | 
 |     if (CategoryRegexs[i].match(IncludeName)) { | 
 |       Ret = Style.IncludeCategories[i].Priority; | 
 |       break; | 
 |     } | 
 |   if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName)) | 
 |     Ret = 0; | 
 |   return Ret; | 
 | } | 
 |  | 
 | int IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName, | 
 |                                                    bool CheckMainHeader) const { | 
 |   int Ret = INT_MAX; | 
 |   for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) | 
 |     if (CategoryRegexs[i].match(IncludeName)) { | 
 |       Ret = Style.IncludeCategories[i].SortPriority; | 
 |       if (Ret == 0) | 
 |         Ret = Style.IncludeCategories[i].Priority; | 
 |       break; | 
 |     } | 
 |   if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName)) | 
 |     Ret = 0; | 
 |   return Ret; | 
 | } | 
 | bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const { | 
 |   switch (Style.MainIncludeChar) { | 
 |   case IncludeStyle::MICD_Quote: | 
 |     if (!IncludeName.starts_with("\"")) | 
 |       return false; | 
 |     break; | 
 |   case IncludeStyle::MICD_AngleBracket: | 
 |     if (!IncludeName.starts_with("<")) | 
 |       return false; | 
 |     break; | 
 |   case IncludeStyle::MICD_Any: | 
 |     break; | 
 |   } | 
 |  | 
 |   IncludeName = | 
 |       IncludeName.drop_front(1).drop_back(1); // remove the surrounding "" or <> | 
 |   // Not matchingStem: implementation files may have compound extensions but | 
 |   // headers may not. | 
 |   StringRef HeaderStem = llvm::sys::path::stem(IncludeName); | 
 |   StringRef FileStem = llvm::sys::path::stem(FileName); // foo.cu for foo.cu.cc | 
 |   StringRef MatchingFileStem = matchingStem(FileName);  // foo for foo.cu.cc | 
 |   // main-header examples: | 
 |   //  1) foo.h => foo.cc | 
 |   //  2) foo.h => foo.cu.cc | 
 |   //  3) foo.proto.h => foo.proto.cc | 
 |   // | 
 |   // non-main-header examples: | 
 |   //  1) foo.h => bar.cc | 
 |   //  2) foo.proto.h => foo.cc | 
 |   StringRef Matching; | 
 |   if (MatchingFileStem.starts_with_insensitive(HeaderStem)) | 
 |     Matching = MatchingFileStem; // example 1), 2) | 
 |   else if (FileStem.equals_insensitive(HeaderStem)) | 
 |     Matching = FileStem; // example 3) | 
 |   if (!Matching.empty()) { | 
 |     llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex, | 
 |                                  llvm::Regex::IgnoreCase); | 
 |     if (MainIncludeRegex.match(Matching)) | 
 |       return true; | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern); | 
 |  | 
 | HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code, | 
 |                                const IncludeStyle &Style) | 
 |     : FileName(FileName), Code(Code), FirstIncludeOffset(-1), | 
 |       MinInsertOffset( | 
 |           getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)), | 
 |       MaxInsertOffset(MinInsertOffset + | 
 |                       getMaxHeaderInsertionOffset( | 
 |                           FileName, Code.drop_front(MinInsertOffset), Style)), | 
 |       MainIncludeFound(false), | 
 |       Categories(Style, FileName) { | 
 |   // Add 0 for main header and INT_MAX for headers that are not in any | 
 |   // category. | 
 |   Priorities = {0, INT_MAX}; | 
 |   for (const auto &Category : Style.IncludeCategories) | 
 |     Priorities.insert(Category.Priority); | 
 |   SmallVector<StringRef, 32> Lines; | 
 |   Code.drop_front(MinInsertOffset).split(Lines, "\n"); | 
 |  | 
 |   unsigned Offset = MinInsertOffset; | 
 |   unsigned NextLineOffset; | 
 |   SmallVector<StringRef, 4> Matches; | 
 |   for (auto Line : Lines) { | 
 |     NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1); | 
 |     if (IncludeRegex.match(Line, &Matches)) { | 
 |       // If this is the last line without trailing newline, we need to make | 
 |       // sure we don't delete across the file boundary. | 
 |       addExistingInclude( | 
 |           Include(Matches[2], | 
 |                   tooling::Range( | 
 |                       Offset, std::min(Line.size() + 1, Code.size() - Offset)), | 
 |                   Matches[1] == "import" ? tooling::IncludeDirective::Import | 
 |                                          : tooling::IncludeDirective::Include), | 
 |           NextLineOffset); | 
 |     } | 
 |     Offset = NextLineOffset; | 
 |   } | 
 |  | 
 |   // Populate CategoryEndOfssets: | 
 |   // - Ensure that CategoryEndOffset[Highest] is always populated. | 
 |   // - If CategoryEndOffset[Priority] isn't set, use the next higher value | 
 |   // that is set, up to CategoryEndOffset[Highest]. | 
 |   auto Highest = Priorities.begin(); | 
 |   auto [It, Inserted] = CategoryEndOffsets.try_emplace(*Highest); | 
 |   if (Inserted) | 
 |     It->second = FirstIncludeOffset >= 0 ? FirstIncludeOffset : MinInsertOffset; | 
 |   // By this point, CategoryEndOffset[Highest] is always set appropriately: | 
 |   //  - to an appropriate location before/after existing #includes, or | 
 |   //  - to right after the header guard, or | 
 |   //  - to the beginning of the file. | 
 |   for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I) | 
 |     if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end()) | 
 |       CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)]; | 
 | } | 
 |  | 
 | // \p Offset: the start of the line following this include directive. | 
 | void HeaderIncludes::addExistingInclude(Include IncludeToAdd, | 
 |                                         unsigned NextLineOffset) { | 
 |   auto &Incs = ExistingIncludes[trimInclude(IncludeToAdd.Name)]; | 
 |   Incs.push_back(std::move(IncludeToAdd)); | 
 |   auto &CurInclude = Incs.back(); | 
 |   // The header name with quotes or angle brackets. | 
 |   // Only record the offset of current #include if we can insert after it. | 
 |   if (CurInclude.R.getOffset() <= MaxInsertOffset) { | 
 |     int Priority = Categories.getIncludePriority( | 
 |         CurInclude.Name, /*CheckMainHeader=*/!MainIncludeFound); | 
 |     if (Priority == 0) | 
 |       MainIncludeFound = true; | 
 |     CategoryEndOffsets[Priority] = NextLineOffset; | 
 |     IncludesByPriority[Priority].push_back(&CurInclude); | 
 |     if (FirstIncludeOffset < 0) | 
 |       FirstIncludeOffset = CurInclude.R.getOffset(); | 
 |   } | 
 | } | 
 |  | 
 | std::optional<tooling::Replacement> | 
 | HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled, | 
 |                        IncludeDirective Directive) const { | 
 |   assert(IncludeName == trimInclude(IncludeName)); | 
 |   // If a <header> ("header") already exists in code, "header" (<header>) with | 
 |   // different quotation and/or directive will still be inserted. | 
 |   // FIXME: figure out if this is the best behavior. | 
 |   auto It = ExistingIncludes.find(IncludeName); | 
 |   if (It != ExistingIncludes.end()) { | 
 |     for (const auto &Inc : It->second) | 
 |       if (Inc.Directive == Directive && | 
 |           ((IsAngled && StringRef(Inc.Name).starts_with("<")) || | 
 |            (!IsAngled && StringRef(Inc.Name).starts_with("\"")))) | 
 |         return std::nullopt; | 
 |   } | 
 |   std::string Quoted = | 
 |       std::string(llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName)); | 
 |   StringRef QuotedName = Quoted; | 
 |   int Priority = Categories.getIncludePriority( | 
 |       QuotedName, /*CheckMainHeader=*/!MainIncludeFound); | 
 |   auto CatOffset = CategoryEndOffsets.find(Priority); | 
 |   assert(CatOffset != CategoryEndOffsets.end()); | 
 |   unsigned InsertOffset = CatOffset->second; // Fall back offset | 
 |   auto Iter = IncludesByPriority.find(Priority); | 
 |   if (Iter != IncludesByPriority.end()) { | 
 |     for (const auto *Inc : Iter->second) { | 
 |       if (QuotedName < Inc->Name) { | 
 |         InsertOffset = Inc->R.getOffset(); | 
 |         break; | 
 |       } | 
 |     } | 
 |   } | 
 |   assert(InsertOffset <= Code.size()); | 
 |   llvm::StringRef DirectiveSpelling = | 
 |       Directive == IncludeDirective::Include ? "include" : "import"; | 
 |   std::string NewInclude = | 
 |       llvm::formatv("#{0} {1}\n", DirectiveSpelling, QuotedName); | 
 |   // When inserting headers at end of the code, also append '\n' to the code | 
 |   // if it does not end with '\n'. | 
 |   // FIXME: when inserting multiple #includes at the end of code, only one | 
 |   // newline should be added. | 
 |   if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n')) | 
 |     NewInclude = "\n" + NewInclude; | 
 |   return tooling::Replacement(FileName, InsertOffset, 0, NewInclude); | 
 | } | 
 |  | 
 | tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName, | 
 |                                              bool IsAngled) const { | 
 |   assert(IncludeName == trimInclude(IncludeName)); | 
 |   tooling::Replacements Result; | 
 |   auto Iter = ExistingIncludes.find(IncludeName); | 
 |   if (Iter == ExistingIncludes.end()) | 
 |     return Result; | 
 |   for (const auto &Inc : Iter->second) { | 
 |     if ((IsAngled && StringRef(Inc.Name).starts_with("\"")) || | 
 |         (!IsAngled && StringRef(Inc.Name).starts_with("<"))) | 
 |       continue; | 
 |     llvm::Error Err = Result.add(tooling::Replacement( | 
 |         FileName, Inc.R.getOffset(), Inc.R.getLength(), "")); | 
 |     if (Err) { | 
 |       auto ErrMsg = "Unexpected conflicts in #include deletions: " + | 
 |                     llvm::toString(std::move(Err)); | 
 |       llvm_unreachable(ErrMsg.c_str()); | 
 |     } | 
 |   } | 
 |   return Result; | 
 | } | 
 |  | 
 | } // namespace tooling | 
 | } // namespace clang |