clang/lib/Tooling/Inclusions/HeaderIncludes.cpp - rust-lang/llvm-project - Git at Google

 //===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "clang/Tooling/Inclusions/HeaderIncludes.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Path.h"
 #include <optional>

 namespace clang {
 namespace tooling {
 namespace {

 LangOptions createLangOpts() {
   LangOptions LangOpts;
   LangOpts.CPlusPlus = 1;
   LangOpts.CPlusPlus11 = 1;
   LangOpts.CPlusPlus14 = 1;
   LangOpts.LineComment = 1;
   LangOpts.CXXOperatorNames = 1;
   LangOpts.Bool = 1;
   LangOpts.ObjC = 1;
   LangOpts.MicrosoftExt = 1;    // To get kw___try, kw___finally.
   LangOpts.DeclSpecKeyword = 1; // To get __declspec.
   LangOpts.WChar = 1;           // To get wchar_t
   return LangOpts;
 }

 // Returns the offset after skipping a sequence of tokens, matched by \p
 // GetOffsetAfterSequence, from the start of the code.
 // \p GetOffsetAfterSequence should be a function that matches a sequence of
 // tokens and returns an offset after the sequence.
 unsigned getOffsetAfterTokenSequence(
     StringRef FileName, StringRef Code, const IncludeStyle &Style,
     llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
         GetOffsetAfterSequence) {
   SourceManagerForFile VirtualSM(FileName, Code);
   SourceManager &SM = VirtualSM.get();
   LangOptions LangOpts = createLangOpts();
   Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(SM.getMainFileID()), SM,
             LangOpts);
   Token Tok;
   // Get the first token.
   Lex.LexFromRawLexer(Tok);
   return GetOffsetAfterSequence(SM, Lex, Tok);
 }

 // Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
 // \p Tok will be the token after this directive; otherwise, it can be any token
 // after the given \p Tok (including \p Tok). If \p RawIDName is provided, the
 // (second) raw_identifier name is checked.
 bool checkAndConsumeDirectiveWithName(
     Lexer &Lex, StringRef Name, Token &Tok,
     std::optional<StringRef> RawIDName = std::nullopt) {
   bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
                  Tok.is(tok::raw_identifier) &&
                  Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
                  Tok.is(tok::raw_identifier) &&
                  (!RawIDName || Tok.getRawIdentifier() == *RawIDName);
   if (Matched)
     Lex.LexFromRawLexer(Tok);
   return Matched;
 }

 void skipComments(Lexer &Lex, Token &Tok) {
   while (Tok.is(tok::comment))
     if (Lex.LexFromRawLexer(Tok))
       return;
 }

 // Returns the offset after header guard directives and any comments
 // before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no
 // header guard is present in the code, this will return the offset after
 // skipping all comments from the start of the code.
 unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
                                                StringRef Code,
                                                const IncludeStyle &Style) {
   // \p Consume returns location after header guard or 0 if no header guard is
   // found.
   auto ConsumeHeaderGuardAndComment =
       [&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
                                  Token Tok)>
               Consume) {
         return getOffsetAfterTokenSequence(
             FileName, Code, Style,
             [&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
               skipComments(Lex, Tok);
               unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
               return std::max(InitialOffset, Consume(SM, Lex, Tok));
             });
       };
   return std::max(
       // #ifndef/#define
       ConsumeHeaderGuardAndComment(
           [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
             if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
               skipComments(Lex, Tok);
               if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) &&
                   Tok.isAtStartOfLine())
                 return SM.getFileOffset(Tok.getLocation());
             }
             return 0;
           }),
       // #pragma once
       ConsumeHeaderGuardAndComment(
           [](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
             if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
                                                  StringRef("once")))
               return SM.getFileOffset(Tok.getLocation());
             return 0;
           }));
 }

 // Check if a sequence of tokens is like
 //    "#include ("header.h" | <header.h>)".
 // If it is, \p Tok will be the token after this directive; otherwise, it can be
 // any token after the given \p Tok (including \p Tok).
 bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
   auto Matched = [&]() {
     Lex.LexFromRawLexer(Tok);
     return true;
   };
   if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
       Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
     if (Lex.LexFromRawLexer(Tok))
       return false;
     if (Tok.is(tok::string_literal))
       return Matched();
     if (Tok.is(tok::less)) {
       while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) {
       }
       if (Tok.is(tok::greater))
         return Matched();
     }
   }
   return false;
 }

 // Returns the offset of the last #include directive after which a new
 // #include can be inserted. This ignores #include's after the #include block(s)
 // in the beginning of a file to avoid inserting headers into code sections
 // where new #include's should not be added by default.
 // These code sections include:
 //      - raw string literals (containing #include).
 //      - #if blocks.
 //      - Special #include's among declarations (e.g. functions).
 //
 // If no #include after which a new #include can be inserted, this returns the
 // offset after skipping all comments from the start of the code.
 // Inserting after an #include is not allowed if it comes after code that is not
 // #include (e.g. pre-processing directive that is not #include, declarations).
 unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
                                      const IncludeStyle &Style) {
   return getOffsetAfterTokenSequence(
       FileName, Code, Style,
       [](const SourceManager &SM, Lexer &Lex, Token Tok) {
         skipComments(Lex, Tok);
         unsigned MaxOffset = SM.getFileOffset(Tok.getLocation());
         while (checkAndConsumeInclusiveDirective(Lex, Tok))
           MaxOffset = SM.getFileOffset(Tok.getLocation());
         return MaxOffset;
       });
 }

 inline StringRef trimInclude(StringRef IncludeName) {
   return IncludeName.trim("\"<>");
 }

 const char IncludeRegexPattern[] =
     R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";

 // The filename of Path excluding extension.
 // Used to match implementation with headers, this differs from sys::path::stem:
 //  - in names with multiple dots (foo.cu.cc) it terminates at the *first*
 //  - an empty stem is never returned: /foo/.bar.x => .bar
 //  - we don't bother to handle . and .. specially
 StringRef matchingStem(llvm::StringRef Path) {
   StringRef Name = llvm::sys::path::filename(Path);
   return Name.substr(0, Name.find('.', 1));
 }

 } // anonymous namespace

 IncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style,
                                                StringRef FileName)
     : Style(Style), FileName(FileName) {
   for (const auto &Category : Style.IncludeCategories) {
     CategoryRegexs.emplace_back(Category.Regex, Category.RegexIsCaseSensitive
                                                     ? llvm::Regex::NoFlags
                                                     : llvm::Regex::IgnoreCase);
   }
   IsMainFile = FileName.ends_with(".c") || FileName.ends_with(".cc") ||
                FileName.ends_with(".cpp") || FileName.ends_with(".c++") ||
                FileName.ends_with(".cxx") || FileName.ends_with(".m") ||
                FileName.ends_with(".mm");
   if (!Style.IncludeIsMainSourceRegex.empty()) {
     llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex);
     IsMainFile |= MainFileRegex.match(FileName);
   }
 }

 int IncludeCategoryManager::getIncludePriority(StringRef IncludeName,
                                                bool CheckMainHeader) const {
   int Ret = INT_MAX;
   for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
     if (CategoryRegexs[i].match(IncludeName)) {
       Ret = Style.IncludeCategories[i].Priority;
       break;
     }
   if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
     Ret = 0;
   return Ret;
 }

 int IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName,
                                                    bool CheckMainHeader) const {
   int Ret = INT_MAX;
   for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
     if (CategoryRegexs[i].match(IncludeName)) {
       Ret = Style.IncludeCategories[i].SortPriority;
       if (Ret == 0)
         Ret = Style.IncludeCategories[i].Priority;
       break;
     }
   if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
     Ret = 0;
   return Ret;
 }
 bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const {
   switch (Style.MainIncludeChar) {
   case IncludeStyle::MICD_Quote:
     if (!IncludeName.starts_with("\""))
       return false;
     break;
   case IncludeStyle::MICD_AngleBracket:
     if (!IncludeName.starts_with("<"))
       return false;
     break;
   case IncludeStyle::MICD_Any:
     break;
   }

   IncludeName =
       IncludeName.drop_front(1).drop_back(1); // remove the surrounding "" or <>
   // Not matchingStem: implementation files may have compound extensions but
   // headers may not.
   StringRef HeaderStem = llvm::sys::path::stem(IncludeName);
   StringRef FileStem = llvm::sys::path::stem(FileName); // foo.cu for foo.cu.cc
   StringRef MatchingFileStem = matchingStem(FileName);  // foo for foo.cu.cc
   // main-header examples:
   //  1) foo.h => foo.cc
   //  2) foo.h => foo.cu.cc
   //  3) foo.proto.h => foo.proto.cc
   //
   // non-main-header examples:
   //  1) foo.h => bar.cc
   //  2) foo.proto.h => foo.cc
   StringRef Matching;
   if (MatchingFileStem.starts_with_insensitive(HeaderStem))
     Matching = MatchingFileStem; // example 1), 2)
   else if (FileStem.equals_insensitive(HeaderStem))
     Matching = FileStem; // example 3)
   if (!Matching.empty()) {
     llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex,
                                  llvm::Regex::IgnoreCase);
     if (MainIncludeRegex.match(Matching))
       return true;
   }
   return false;
 }

 const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern);

 HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
                                const IncludeStyle &Style)
     : FileName(FileName), Code(Code), FirstIncludeOffset(-1),
       MinInsertOffset(
           getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
       MaxInsertOffset(MinInsertOffset +
                       getMaxHeaderInsertionOffset(
                           FileName, Code.drop_front(MinInsertOffset), Style)),
       MainIncludeFound(false),
       Categories(Style, FileName) {
   // Add 0 for main header and INT_MAX for headers that are not in any
   // category.
   Priorities = {0, INT_MAX};
   for (const auto &Category : Style.IncludeCategories)
     Priorities.insert(Category.Priority);
   SmallVector<StringRef, 32> Lines;
   Code.drop_front(MinInsertOffset).split(Lines, "\n");

   unsigned Offset = MinInsertOffset;
   unsigned NextLineOffset;
   SmallVector<StringRef, 4> Matches;
   for (auto Line : Lines) {
     NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
     if (IncludeRegex.match(Line, &Matches)) {
       // If this is the last line without trailing newline, we need to make
       // sure we don't delete across the file boundary.
       addExistingInclude(
           Include(Matches[2],
                   tooling::Range(
                       Offset, std::min(Line.size() + 1, Code.size() - Offset)),
                   Matches[1] == "import" ? tooling::IncludeDirective::Import
                                          : tooling::IncludeDirective::Include),
           NextLineOffset);
     }
     Offset = NextLineOffset;
   }

   // Populate CategoryEndOfssets:
   // - Ensure that CategoryEndOffset[Highest] is always populated.
   // - If CategoryEndOffset[Priority] isn't set, use the next higher value
   // that is set, up to CategoryEndOffset[Highest].
   auto Highest = Priorities.begin();
   auto [It, Inserted] = CategoryEndOffsets.try_emplace(*Highest);
   if (Inserted)
     It->second = FirstIncludeOffset >= 0 ? FirstIncludeOffset : MinInsertOffset;
   // By this point, CategoryEndOffset[Highest] is always set appropriately:
   //  - to an appropriate location before/after existing #includes, or
   //  - to right after the header guard, or
   //  - to the beginning of the file.
   for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
     if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
       CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
 }

 // \p Offset: the start of the line following this include directive.
 void HeaderIncludes::addExistingInclude(Include IncludeToAdd,
                                         unsigned NextLineOffset) {
   auto &Incs = ExistingIncludes[trimInclude(IncludeToAdd.Name)];
   Incs.push_back(std::move(IncludeToAdd));
   auto &CurInclude = Incs.back();
   // The header name with quotes or angle brackets.
   // Only record the offset of current #include if we can insert after it.
   if (CurInclude.R.getOffset() <= MaxInsertOffset) {
     int Priority = Categories.getIncludePriority(
         CurInclude.Name, /*CheckMainHeader=*/!MainIncludeFound);
     if (Priority == 0)
       MainIncludeFound = true;
     CategoryEndOffsets[Priority] = NextLineOffset;
     IncludesByPriority[Priority].push_back(&CurInclude);
     if (FirstIncludeOffset < 0)
       FirstIncludeOffset = CurInclude.R.getOffset();
   }
 }

 std::optional<tooling::Replacement>
 HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled,
                        IncludeDirective Directive) const {
   assert(IncludeName == trimInclude(IncludeName));
   // If a <header> ("header") already exists in code, "header" (<header>) with
   // different quotation and/or directive will still be inserted.
   // FIXME: figure out if this is the best behavior.
   auto It = ExistingIncludes.find(IncludeName);
   if (It != ExistingIncludes.end()) {
     for (const auto &Inc : It->second)
       if (Inc.Directive == Directive &&
           ((IsAngled && StringRef(Inc.Name).starts_with("<")) ||
            (!IsAngled && StringRef(Inc.Name).starts_with("\""))))
         return std::nullopt;
   }
   std::string Quoted =
       std::string(llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName));
   StringRef QuotedName = Quoted;
   int Priority = Categories.getIncludePriority(
       QuotedName, /*CheckMainHeader=*/!MainIncludeFound);
   auto CatOffset = CategoryEndOffsets.find(Priority);
   assert(CatOffset != CategoryEndOffsets.end());
   unsigned InsertOffset = CatOffset->second; // Fall back offset
   auto Iter = IncludesByPriority.find(Priority);
   if (Iter != IncludesByPriority.end()) {
     for (const auto *Inc : Iter->second) {
       if (QuotedName < Inc->Name) {
         InsertOffset = Inc->R.getOffset();
         break;
       }
     }
   }
   assert(InsertOffset <= Code.size());
   llvm::StringRef DirectiveSpelling =
       Directive == IncludeDirective::Include ? "include" : "import";
   std::string NewInclude =
       llvm::formatv("#{0} {1}\n", DirectiveSpelling, QuotedName);
   // When inserting headers at end of the code, also append '\n' to the code
   // if it does not end with '\n'.
   // FIXME: when inserting multiple #includes at the end of code, only one
   // newline should be added.
   if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n'))
     NewInclude = "\n" + NewInclude;
   return tooling::Replacement(FileName, InsertOffset, 0, NewInclude);
 }

 tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName,
                                              bool IsAngled) const {
   assert(IncludeName == trimInclude(IncludeName));
   tooling::Replacements Result;
   auto Iter = ExistingIncludes.find(IncludeName);
   if (Iter == ExistingIncludes.end())
     return Result;
   for (const auto &Inc : Iter->second) {
     if ((IsAngled && StringRef(Inc.Name).starts_with("\"")) ||
         (!IsAngled && StringRef(Inc.Name).starts_with("<")))
       continue;
     llvm::Error Err = Result.add(tooling::Replacement(
         FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
     if (Err) {
       auto ErrMsg = "Unexpected conflicts in #include deletions: " +
                     llvm::toString(std::move(Err));
       llvm_unreachable(ErrMsg.c_str());
     }
   }
   return Result;
 }

 } // namespace tooling
 } // namespace clang
	//===--- HeaderIncludes.cpp - Insert/Delete #includes --- C++ -----------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "clang/Tooling/Inclusions/HeaderIncludes.h"
	#include "clang/Basic/FileManager.h"
	#include "clang/Basic/SourceManager.h"
	#include "clang/Lex/Lexer.h"
	#include "llvm/Support/FormatVariadic.h"
	#include "llvm/Support/Path.h"
	#include <optional>

	namespace clang {
	namespace tooling {
	namespace {

	LangOptions createLangOpts() {
	LangOptions LangOpts;
	LangOpts.CPlusPlus = 1;
	LangOpts.CPlusPlus11 = 1;
	LangOpts.CPlusPlus14 = 1;
	LangOpts.LineComment = 1;
	LangOpts.CXXOperatorNames = 1;
	LangOpts.Bool = 1;
	LangOpts.ObjC = 1;
	LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
	LangOpts.DeclSpecKeyword = 1; // To get __declspec.
	LangOpts.WChar = 1; // To get wchar_t
	return LangOpts;
	}

	// Returns the offset after skipping a sequence of tokens, matched by \p
	// GetOffsetAfterSequence, from the start of the code.
	// \p GetOffsetAfterSequence should be a function that matches a sequence of
	// tokens and returns an offset after the sequence.
	unsigned getOffsetAfterTokenSequence(
	StringRef FileName, StringRef Code, const IncludeStyle &Style,
	llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
	GetOffsetAfterSequence) {
	SourceManagerForFile VirtualSM(FileName, Code);
	SourceManager &SM = VirtualSM.get();
	LangOptions LangOpts = createLangOpts();
	Lexer Lex(SM.getMainFileID(), SM.getBufferOrFake(SM.getMainFileID()), SM,
	LangOpts);
	Token Tok;
	// Get the first token.
	Lex.LexFromRawLexer(Tok);
	return GetOffsetAfterSequence(SM, Lex, Tok);
	}

	// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
	// \p Tok will be the token after this directive; otherwise, it can be any token
	// after the given \p Tok (including \p Tok). If \p RawIDName is provided, the
	// (second) raw_identifier name is checked.
	bool checkAndConsumeDirectiveWithName(
	Lexer &Lex, StringRef Name, Token &Tok,
	std::optional<StringRef> RawIDName = std::nullopt) {
	bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
	Tok.is(tok::raw_identifier) &&
	Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
	Tok.is(tok::raw_identifier) &&
	(!RawIDName \|\| Tok.getRawIdentifier() == *RawIDName);
	if (Matched)
	Lex.LexFromRawLexer(Tok);
	return Matched;
	}

	void skipComments(Lexer &Lex, Token &Tok) {
	while (Tok.is(tok::comment))
	if (Lex.LexFromRawLexer(Tok))
	return;
	}

	// Returns the offset after header guard directives and any comments
	// before/after header guards (e.g. #ifndef/#define pair, #pragma once). If no
	// header guard is present in the code, this will return the offset after
	// skipping all comments from the start of the code.
	unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
	StringRef Code,
	const IncludeStyle &Style) {
	// \p Consume returns location after header guard or 0 if no header guard is
	// found.
	auto ConsumeHeaderGuardAndComment =
	[&](std::function<unsigned(const SourceManager &SM, Lexer &Lex,
	Token Tok)>
	Consume) {
	return getOffsetAfterTokenSequence(
	FileName, Code, Style,
	[&Consume](const SourceManager &SM, Lexer &Lex, Token Tok) {
	skipComments(Lex, Tok);
	unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
	return std::max(InitialOffset, Consume(SM, Lex, Tok));
	});
	};
	return std::max(
	// #ifndef/#define
	ConsumeHeaderGuardAndComment(
	[](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
	if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
	skipComments(Lex, Tok);
	if (checkAndConsumeDirectiveWithName(Lex, "define", Tok) &&
	Tok.isAtStartOfLine())
	return SM.getFileOffset(Tok.getLocation());
	}
	return 0;
	}),
	// #pragma once
	ConsumeHeaderGuardAndComment(
	[](const SourceManager &SM, Lexer &Lex, Token Tok) -> unsigned {
	if (checkAndConsumeDirectiveWithName(Lex, "pragma", Tok,
	StringRef("once")))
	return SM.getFileOffset(Tok.getLocation());
	return 0;
	}));
	}

	// Check if a sequence of tokens is like
	// "#include ("header.h" \| <header.h>)".
	// If it is, \p Tok will be the token after this directive; otherwise, it can be
	// any token after the given \p Tok (including \p Tok).
	bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
	auto Matched = [&]() {
	Lex.LexFromRawLexer(Tok);
	return true;
	};
	if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
	Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
	if (Lex.LexFromRawLexer(Tok))
	return false;
	if (Tok.is(tok::string_literal))
	return Matched();
	if (Tok.is(tok::less)) {
	while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) {
	}
	if (Tok.is(tok::greater))
	return Matched();
	}
	}
	return false;
	}

	// Returns the offset of the last #include directive after which a new
	// #include can be inserted. This ignores #include's after the #include block(s)
	// in the beginning of a file to avoid inserting headers into code sections
	// where new #include's should not be added by default.
	// These code sections include:
	// - raw string literals (containing #include).
	// - #if blocks.
	// - Special #include's among declarations (e.g. functions).
	//
	// If no #include after which a new #include can be inserted, this returns the
	// offset after skipping all comments from the start of the code.
	// Inserting after an #include is not allowed if it comes after code that is not
	// #include (e.g. pre-processing directive that is not #include, declarations).
	unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
	const IncludeStyle &Style) {
	return getOffsetAfterTokenSequence(
	FileName, Code, Style,
	[](const SourceManager &SM, Lexer &Lex, Token Tok) {
	skipComments(Lex, Tok);
	unsigned MaxOffset = SM.getFileOffset(Tok.getLocation());
	while (checkAndConsumeInclusiveDirective(Lex, Tok))
	MaxOffset = SM.getFileOffset(Tok.getLocation());
	return MaxOffset;
	});
	}

	inline StringRef trimInclude(StringRef IncludeName) {
	return IncludeName.trim("\"<>");
	}

	const char IncludeRegexPattern[] =
	R"(^[\t\ ]#[\t\ ](import\|include)[^"<](["<][^">][">]))";

	// The filename of Path excluding extension.
	// Used to match implementation with headers, this differs from sys::path::stem:
	// - in names with multiple dots (foo.cu.cc) it terminates at the first
	// - an empty stem is never returned: /foo/.bar.x => .bar
	// - we don't bother to handle . and .. specially
	StringRef matchingStem(llvm::StringRef Path) {
	StringRef Name = llvm::sys::path::filename(Path);
	return Name.substr(0, Name.find('.', 1));
	}

	} // anonymous namespace

	IncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style,
	StringRef FileName)
	: Style(Style), FileName(FileName) {
	for (const auto &Category : Style.IncludeCategories) {
	CategoryRegexs.emplace_back(Category.Regex, Category.RegexIsCaseSensitive
	? llvm::Regex::NoFlags
	: llvm::Regex::IgnoreCase);
	}
	IsMainFile = FileName.ends_with(".c") \|\| FileName.ends_with(".cc") \|\|
	FileName.ends_with(".cpp") \|\| FileName.ends_with(".c++") \|\|
	FileName.ends_with(".cxx") \|\| FileName.ends_with(".m") \|\|
	FileName.ends_with(".mm");
	if (!Style.IncludeIsMainSourceRegex.empty()) {
	llvm::Regex MainFileRegex(Style.IncludeIsMainSourceRegex);
	IsMainFile \|= MainFileRegex.match(FileName);
	}
	}

	int IncludeCategoryManager::getIncludePriority(StringRef IncludeName,
	bool CheckMainHeader) const {
	int Ret = INT_MAX;
	for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
	if (CategoryRegexs[i].match(IncludeName)) {
	Ret = Style.IncludeCategories[i].Priority;
	break;
	}
	if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
	Ret = 0;
	return Ret;
	}

	int IncludeCategoryManager::getSortIncludePriority(StringRef IncludeName,
	bool CheckMainHeader) const {
	int Ret = INT_MAX;
	for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
	if (CategoryRegexs[i].match(IncludeName)) {
	Ret = Style.IncludeCategories[i].SortPriority;
	if (Ret == 0)
	Ret = Style.IncludeCategories[i].Priority;
	break;
	}
	if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
	Ret = 0;
	return Ret;
	}
	bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const {
	switch (Style.MainIncludeChar) {
	case IncludeStyle::MICD_Quote:
	if (!IncludeName.starts_with("\""))
	return false;
	break;
	case IncludeStyle::MICD_AngleBracket:
	if (!IncludeName.starts_with("<"))
	return false;
	break;
	case IncludeStyle::MICD_Any:
	break;
	}

	IncludeName =
	IncludeName.drop_front(1).drop_back(1); // remove the surrounding "" or <>
	// Not matchingStem: implementation files may have compound extensions but
	// headers may not.
	StringRef HeaderStem = llvm::sys::path::stem(IncludeName);
	StringRef FileStem = llvm::sys::path::stem(FileName); // foo.cu for foo.cu.cc
	StringRef MatchingFileStem = matchingStem(FileName); // foo for foo.cu.cc
	// main-header examples:
	// 1) foo.h => foo.cc
	// 2) foo.h => foo.cu.cc
	// 3) foo.proto.h => foo.proto.cc
	//
	// non-main-header examples:
	// 1) foo.h => bar.cc
	// 2) foo.proto.h => foo.cc
	StringRef Matching;
	if (MatchingFileStem.starts_with_insensitive(HeaderStem))
	Matching = MatchingFileStem; // example 1), 2)
	else if (FileStem.equals_insensitive(HeaderStem))
	Matching = FileStem; // example 3)
	if (!Matching.empty()) {
	llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex,
	llvm::Regex::IgnoreCase);
	if (MainIncludeRegex.match(Matching))
	return true;
	}
	return false;
	}

	const llvm::Regex HeaderIncludes::IncludeRegex(IncludeRegexPattern);

	HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
	const IncludeStyle &Style)
	: FileName(FileName), Code(Code), FirstIncludeOffset(-1),
	MinInsertOffset(
	getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
	MaxInsertOffset(MinInsertOffset +
	getMaxHeaderInsertionOffset(
	FileName, Code.drop_front(MinInsertOffset), Style)),
	MainIncludeFound(false),
	Categories(Style, FileName) {
	// Add 0 for main header and INT_MAX for headers that are not in any
	// category.
	Priorities = {0, INT_MAX};
	for (const auto &Category : Style.IncludeCategories)
	Priorities.insert(Category.Priority);
	SmallVector<StringRef, 32> Lines;
	Code.drop_front(MinInsertOffset).split(Lines, "\n");

	unsigned Offset = MinInsertOffset;
	unsigned NextLineOffset;
	SmallVector<StringRef, 4> Matches;
	for (auto Line : Lines) {
	NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
	if (IncludeRegex.match(Line, &Matches)) {
	// If this is the last line without trailing newline, we need to make
	// sure we don't delete across the file boundary.
	addExistingInclude(
	Include(Matches[2],
	tooling::Range(
	Offset, std::min(Line.size() + 1, Code.size() - Offset)),
	Matches[1] == "import" ? tooling::IncludeDirective::Import
	: tooling::IncludeDirective::Include),
	NextLineOffset);
	}
	Offset = NextLineOffset;
	}

	// Populate CategoryEndOfssets:
	// - Ensure that CategoryEndOffset[Highest] is always populated.
	// - If CategoryEndOffset[Priority] isn't set, use the next higher value
	// that is set, up to CategoryEndOffset[Highest].
	auto Highest = Priorities.begin();
	auto [It, Inserted] = CategoryEndOffsets.try_emplace(*Highest);
	if (Inserted)
	It->second = FirstIncludeOffset >= 0 ? FirstIncludeOffset : MinInsertOffset;
	// By this point, CategoryEndOffset[Highest] is always set appropriately:
	// - to an appropriate location before/after existing #includes, or
	// - to right after the header guard, or
	// - to the beginning of the file.
	for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
	if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
	CategoryEndOffsets[I] = CategoryEndOffsets[std::prev(I)];
	}

	// \p Offset: the start of the line following this include directive.
	void HeaderIncludes::addExistingInclude(Include IncludeToAdd,
	unsigned NextLineOffset) {
	auto &Incs = ExistingIncludes[trimInclude(IncludeToAdd.Name)];
	Incs.push_back(std::move(IncludeToAdd));
	auto &CurInclude = Incs.back();
	// The header name with quotes or angle brackets.
	// Only record the offset of current #include if we can insert after it.
	if (CurInclude.R.getOffset() <= MaxInsertOffset) {
	int Priority = Categories.getIncludePriority(
	CurInclude.Name, /CheckMainHeader=/!MainIncludeFound);
	if (Priority == 0)
	MainIncludeFound = true;
	CategoryEndOffsets[Priority] = NextLineOffset;
	IncludesByPriority[Priority].push_back(&CurInclude);
	if (FirstIncludeOffset < 0)
	FirstIncludeOffset = CurInclude.R.getOffset();
	}
	}

	std::optional<tooling::Replacement>
	HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled,
	IncludeDirective Directive) const {
	assert(IncludeName == trimInclude(IncludeName));
	// If a <header> ("header") already exists in code, "header" (<header>) with
	// different quotation and/or directive will still be inserted.
	// FIXME: figure out if this is the best behavior.
	auto It = ExistingIncludes.find(IncludeName);
	if (It != ExistingIncludes.end()) {
	for (const auto &Inc : It->second)
	if (Inc.Directive == Directive &&
	((IsAngled && StringRef(Inc.Name).starts_with("<")) \|\|
	(!IsAngled && StringRef(Inc.Name).starts_with("\""))))
	return std::nullopt;
	}
	std::string Quoted =
	std::string(llvm::formatv(IsAngled ? "<{0}>" : "\"{0}\"", IncludeName));
	StringRef QuotedName = Quoted;
	int Priority = Categories.getIncludePriority(
	QuotedName, /CheckMainHeader=/!MainIncludeFound);
	auto CatOffset = CategoryEndOffsets.find(Priority);
	assert(CatOffset != CategoryEndOffsets.end());
	unsigned InsertOffset = CatOffset->second; // Fall back offset
	auto Iter = IncludesByPriority.find(Priority);
	if (Iter != IncludesByPriority.end()) {
	for (const auto *Inc : Iter->second) {
	if (QuotedName < Inc->Name) {
	InsertOffset = Inc->R.getOffset();
	break;
	}
	}
	}
	assert(InsertOffset <= Code.size());
	llvm::StringRef DirectiveSpelling =
	Directive == IncludeDirective::Include ? "include" : "import";
	std::string NewInclude =
	llvm::formatv("#{0} {1}\n", DirectiveSpelling, QuotedName);
	// When inserting headers at end of the code, also append '\n' to the code
	// if it does not end with '\n'.
	// FIXME: when inserting multiple #includes at the end of code, only one
	// newline should be added.
	if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n'))
	NewInclude = "\n" + NewInclude;
	return tooling::Replacement(FileName, InsertOffset, 0, NewInclude);
	}

	tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName,
	bool IsAngled) const {
	assert(IncludeName == trimInclude(IncludeName));
	tooling::Replacements Result;
	auto Iter = ExistingIncludes.find(IncludeName);
	if (Iter == ExistingIncludes.end())
	return Result;
	for (const auto &Inc : Iter->second) {
	if ((IsAngled && StringRef(Inc.Name).starts_with("\"")) \|\|
	(!IsAngled && StringRef(Inc.Name).starts_with("<")))
	continue;
	llvm::Error Err = Result.add(tooling::Replacement(
	FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
	if (Err) {
	auto ErrMsg = "Unexpected conflicts in #include deletions: " +
	llvm::toString(std::move(Err));
	llvm_unreachable(ErrMsg.c_str());
	}
	}
	return Result;
	}

	} // namespace tooling
	} // namespace clang