|  | //===--- Macros.h - Format C++ code -----------------------------*- C++ -*-===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | /// | 
|  | /// \file | 
|  | /// This file contains the main building blocks of macro support in | 
|  | /// clang-format. | 
|  | /// | 
|  | /// In order to not violate the requirement that clang-format can format files | 
|  | /// in isolation, clang-format's macro support uses expansions users provide | 
|  | /// as part of clang-format's style configuration. | 
|  | /// | 
|  | /// Macro definitions are of the form "MACRO(p1, p2)=p1 + p2", but only support | 
|  | /// one level of expansion (\see MacroExpander for a full description of what | 
|  | /// is supported). | 
|  | /// | 
|  | /// As part of parsing, clang-format uses the MacroExpander to expand the | 
|  | /// spelled token streams into expanded token streams when it encounters a | 
|  | /// macro call. The UnwrappedLineParser continues to parse UnwrappedLines | 
|  | /// from the expanded token stream. | 
|  | /// After the expanded unwrapped lines are parsed, the MacroCallReconstructor | 
|  | /// matches the spelled token stream into unwrapped lines that best resemble the | 
|  | /// structure of the expanded unwrapped lines. These reconstructed unwrapped | 
|  | /// lines are aliasing the tokens in the expanded token stream, so that token | 
|  | /// annotations will be reused when formatting the spelled macro calls. | 
|  | /// | 
|  | /// When formatting, clang-format annotates and formats the expanded unwrapped | 
|  | /// lines first, determining the token types. Next, it formats the spelled | 
|  | /// unwrapped lines, keeping the token types fixed, while allowing other | 
|  | /// formatting decisions to change. | 
|  | /// | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #ifndef CLANG_LIB_FORMAT_MACROS_H | 
|  | #define CLANG_LIB_FORMAT_MACROS_H | 
|  |  | 
|  | #include <list> | 
|  | #include <map> | 
|  | #include <string> | 
|  | #include <vector> | 
|  |  | 
|  | #include "FormatToken.h" | 
|  | #include "llvm/ADT/ArrayRef.h" | 
|  | #include "llvm/ADT/DenseMap.h" | 
|  | #include "llvm/ADT/SmallVector.h" | 
|  | #include "llvm/ADT/StringRef.h" | 
|  |  | 
|  | namespace clang { | 
|  | namespace format { | 
|  |  | 
|  | struct UnwrappedLine; | 
|  | struct UnwrappedLineNode; | 
|  |  | 
|  | /// Takes a set of macro definitions as strings and allows expanding calls to | 
|  | /// those macros. | 
|  | /// | 
|  | /// For example: | 
|  | /// Definition: A(x, y)=x + y | 
|  | /// Call      : A(int a = 1, 2) | 
|  | /// Expansion : int a = 1 + 2 | 
|  | /// | 
|  | /// Expansion does not check arity of the definition. | 
|  | /// If fewer arguments than expected are provided, the remaining parameters | 
|  | /// are considered empty: | 
|  | /// Call     : A(a) | 
|  | /// Expansion: a + | 
|  | /// If more arguments than expected are provided, they will be discarded. | 
|  | /// | 
|  | /// The expander does not support: | 
|  | /// - recursive expansion | 
|  | /// - stringification | 
|  | /// - concatenation | 
|  | /// - variadic macros | 
|  | /// | 
|  | /// Furthermore, only a single expansion of each macro argument is supported, | 
|  | /// so that we cannot get conflicting formatting decisions from different | 
|  | /// expansions. | 
|  | /// Definition: A(x)=x+x | 
|  | /// Call      : A(id) | 
|  | /// Expansion : id+x | 
|  | /// | 
|  | class MacroExpander { | 
|  | public: | 
|  | using ArgsList = llvm::ArrayRef<llvm::SmallVector<FormatToken *, 8>>; | 
|  |  | 
|  | /// Construct a macro expander from a set of macro definitions. | 
|  | /// Macro definitions must be encoded as UTF-8. | 
|  | /// | 
|  | /// Each entry in \p Macros must conform to the following simple | 
|  | /// macro-definition language: | 
|  | /// <definition> ::= <id> <expansion> | <id> "(" <params> ")" <expansion> | 
|  | /// <params>     ::= <id-list> | "" | 
|  | /// <id-list>    ::= <id> | <id> "," <params> | 
|  | /// <expansion>  ::= "=" <tail> | <eof> | 
|  | /// <tail>       ::= <tok> <tail> | <eof> | 
|  | /// | 
|  | /// Macros that cannot be parsed will be silently discarded. | 
|  | /// | 
|  | MacroExpander(const std::vector<std::string> &Macros, | 
|  | clang::SourceManager &SourceMgr, const FormatStyle &Style, | 
|  | llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, | 
|  | IdentifierTable &IdentTable); | 
|  | ~MacroExpander(); | 
|  |  | 
|  | /// Returns whether any macro \p Name is defined, regardless of overloads. | 
|  | bool defined(llvm::StringRef Name) const; | 
|  |  | 
|  | /// Returns whetherh there is an object-like overload, i.e. where the macro | 
|  | /// has no arguments and should not consume subsequent parentheses. | 
|  | bool objectLike(llvm::StringRef Name) const; | 
|  |  | 
|  | /// Returns whether macro \p Name provides an overload with the given arity. | 
|  | bool hasArity(llvm::StringRef Name, unsigned Arity) const; | 
|  |  | 
|  | /// Returns the expanded stream of format tokens for \p ID, where | 
|  | /// each element in \p Args is a positional argument to the macro call. | 
|  | /// If \p Args is not set, the object-like overload is used. | 
|  | /// If \p Args is set, the overload with the arity equal to \c Args.size() is | 
|  | /// used. | 
|  | llvm::SmallVector<FormatToken *, 8> | 
|  | expand(FormatToken *ID, std::optional<ArgsList> OptionalArgs) const; | 
|  |  | 
|  | private: | 
|  | struct Definition; | 
|  | class DefinitionParser; | 
|  |  | 
|  | void parseDefinition(const std::string &Macro); | 
|  |  | 
|  | clang::SourceManager &SourceMgr; | 
|  | const FormatStyle &Style; | 
|  | llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator; | 
|  | IdentifierTable &IdentTable; | 
|  | SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers; | 
|  | llvm::StringMap<llvm::DenseMap<int, Definition>> FunctionLike; | 
|  | llvm::StringMap<Definition> ObjectLike; | 
|  | }; | 
|  |  | 
|  | /// Converts a sequence of UnwrappedLines containing expanded macros into a | 
|  | /// single UnwrappedLine containing the macro calls.  This UnwrappedLine may be | 
|  | /// broken into child lines, in a way that best conveys the structure of the | 
|  | /// expanded code. | 
|  | /// | 
|  | /// In the simplest case, a spelled UnwrappedLine contains one macro, and after | 
|  | /// expanding it we have one expanded UnwrappedLine.  In general, macro | 
|  | /// expansions can span UnwrappedLines, and multiple macros can contribute | 
|  | /// tokens to the same line.  We keep consuming expanded lines until: | 
|  | /// *   all expansions that started have finished (we're not chopping any macros | 
|  | ///     in half) | 
|  | /// *   *and* we've reached the end of a *spelled* unwrapped line. | 
|  | /// | 
|  | /// A single UnwrappedLine represents this chunk of code. | 
|  | /// | 
|  | /// After this point, the state of the spelled/expanded stream is "in sync" | 
|  | /// (both at the start of an UnwrappedLine, with no macros open), so the | 
|  | /// Reconstructor can be thrown away and parsing can continue. | 
|  | /// | 
|  | /// Given a mapping from the macro name identifier token in the macro call | 
|  | /// to the tokens of the macro call, for example: | 
|  | /// CLASSA -> CLASSA({public: void x();}) | 
|  | /// | 
|  | /// When getting the formatted lines of the expansion via the \c addLine method | 
|  | /// (each '->' specifies a call to \c addLine ): | 
|  | /// -> class A { | 
|  | /// -> public: | 
|  | /// ->   void x(); | 
|  | /// -> }; | 
|  | /// | 
|  | /// Creates the tree of unwrapped lines containing the macro call tokens so that | 
|  | /// the macro call tokens fit the semantic structure of the expanded formatted | 
|  | /// lines: | 
|  | /// -> CLASSA({ | 
|  | /// -> public: | 
|  | /// ->   void x(); | 
|  | /// -> }) | 
|  | class MacroCallReconstructor { | 
|  | public: | 
|  | /// Create an Reconstructor whose resulting \p UnwrappedLine will start at | 
|  | /// \p Level, using the map from name identifier token to the corresponding | 
|  | /// tokens of the spelled macro call. | 
|  | MacroCallReconstructor( | 
|  | unsigned Level, | 
|  | const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> | 
|  | &ActiveExpansions); | 
|  |  | 
|  | /// For the given \p Line, match all occurences of tokens expanded from a | 
|  | /// macro to unwrapped lines in the spelled macro call so that the resulting | 
|  | /// tree of unwrapped lines best resembles the structure of unwrapped lines | 
|  | /// passed in via \c addLine. | 
|  | void addLine(const UnwrappedLine &Line); | 
|  |  | 
|  | /// Check whether at the current state there is no open macro expansion | 
|  | /// that needs to be processed to finish an macro call. | 
|  | /// Only when \c finished() is true, \c takeResult() can be called to retrieve | 
|  | /// the resulting \c UnwrappedLine. | 
|  | /// If there are multiple subsequent macro calls within an unwrapped line in | 
|  | /// the spelled token stream, the calling code may also continue to call | 
|  | /// \c addLine() when \c finished() is true. | 
|  | bool finished() const { return ActiveExpansions.empty(); } | 
|  |  | 
|  | /// Retrieve the formatted \c UnwrappedLine containing the orginal | 
|  | /// macro calls, formatted according to the expanded token stream received | 
|  | /// via \c addLine(). | 
|  | /// Generally, this line tries to have the same structure as the expanded, | 
|  | /// formatted unwrapped lines handed in via \c addLine(), with the exception | 
|  | /// that for multiple top-level lines, each subsequent line will be the | 
|  | /// child of the last token in its predecessor. This representation is chosen | 
|  | /// because it is a precondition to the formatter that we get what looks like | 
|  | /// a single statement in a single \c UnwrappedLine (i.e. matching parens). | 
|  | /// | 
|  | /// If a token in a macro argument is a child of a token in the expansion, | 
|  | /// the parent will be the corresponding token in the macro call. | 
|  | /// For example: | 
|  | ///   #define C(a, b) class C { a b | 
|  | ///   C(int x;, int y;) | 
|  | /// would expand to | 
|  | ///   class C { int x; int y; | 
|  | /// where in a formatted line "int x;" and "int y;" would both be new separate | 
|  | /// lines. | 
|  | /// | 
|  | /// In the result, "int x;" will be a child of the opening parenthesis in "C(" | 
|  | /// and "int y;" will be a child of the "," token: | 
|  | ///   C ( | 
|  | ///     \- int x; | 
|  | ///     , | 
|  | ///     \- int y; | 
|  | ///     ) | 
|  | UnwrappedLine takeResult() &&; | 
|  |  | 
|  | private: | 
|  | void add(FormatToken *Token, FormatToken *ExpandedParent, bool First); | 
|  | void prepareParent(FormatToken *ExpandedParent, bool First); | 
|  | FormatToken *getParentInResult(FormatToken *Parent); | 
|  | void reconstruct(FormatToken *Token); | 
|  | void startReconstruction(FormatToken *Token); | 
|  | bool reconstructActiveCallUntil(FormatToken *Token); | 
|  | void endReconstruction(FormatToken *Token); | 
|  | bool processNextReconstructed(); | 
|  | void finalize(); | 
|  |  | 
|  | struct ReconstructedLine; | 
|  |  | 
|  | void appendToken(FormatToken *Token, ReconstructedLine *L = nullptr); | 
|  | UnwrappedLine createUnwrappedLine(const ReconstructedLine &Line, int Level); | 
|  | void debug(const ReconstructedLine &Line, int Level); | 
|  | ReconstructedLine &parentLine(); | 
|  | ReconstructedLine *currentLine(); | 
|  | void debugParentMap() const; | 
|  |  | 
|  | #ifndef NDEBUG | 
|  | enum ReconstructorState { | 
|  | Start,      // No macro expansion was found in the input yet. | 
|  | InProgress, // During a macro reconstruction. | 
|  | Finalized,  // Past macro reconstruction, the result is finalized. | 
|  | }; | 
|  | ReconstructorState State = Start; | 
|  | #endif | 
|  |  | 
|  | // Node in which we build up the resulting unwrapped line; this type is | 
|  | // analogous to UnwrappedLineNode. | 
|  | struct LineNode { | 
|  | LineNode() = default; | 
|  | LineNode(FormatToken *Tok) : Tok(Tok) {} | 
|  | FormatToken *Tok = nullptr; | 
|  | llvm::SmallVector<std::unique_ptr<ReconstructedLine>> Children; | 
|  | }; | 
|  |  | 
|  | // Line in which we build up the resulting unwrapped line. | 
|  | // FIXME: Investigate changing UnwrappedLine to a pointer type and using it | 
|  | // instead of rolling our own type. | 
|  | struct ReconstructedLine { | 
|  | llvm::SmallVector<std::unique_ptr<LineNode>> Tokens; | 
|  | }; | 
|  |  | 
|  | // The line in which we collect the resulting reconstructed output. | 
|  | // To reduce special cases in the algorithm, the first level of the line | 
|  | // contains a single null token that has the reconstructed incoming | 
|  | // lines as children. | 
|  | // In the end, we stich the lines together so that each subsequent line | 
|  | // is a child of the last token of the previous line. This is necessary | 
|  | // in order to format the overall expression as a single logical line - | 
|  | // if we created separate lines, we'd format them with their own top-level | 
|  | // indent depending on the semantic structure, which is not desired. | 
|  | ReconstructedLine Result; | 
|  |  | 
|  | // Stack of currently "open" lines, where each line's predecessor's last | 
|  | // token is the parent token for that line. | 
|  | llvm::SmallVector<ReconstructedLine *> ActiveReconstructedLines; | 
|  |  | 
|  | // Maps from the expanded token to the token that takes its place in the | 
|  | // reconstructed token stream in terms of parent-child relationships. | 
|  | // Note that it might take multiple steps to arrive at the correct | 
|  | // parent in the output. | 
|  | // Given: #define C(a, b) []() { a; b; } | 
|  | // And a call: C(f(), g()) | 
|  | // The structure in the incoming formatted unwrapped line will be: | 
|  | // []() { | 
|  | //      |- f(); | 
|  | //      \- g(); | 
|  | // } | 
|  | // with f and g being children of the opening brace. | 
|  | // In the reconstructed call: | 
|  | // C(f(), g()) | 
|  | //  \- f() | 
|  | //      \- g() | 
|  | // We want f to be a child of the opening parenthesis and g to be a child | 
|  | // of the comma token in the macro call. | 
|  | // Thus, we map | 
|  | // { -> ( | 
|  | // and add | 
|  | // ( -> , | 
|  | // once we're past the comma in the reconstruction. | 
|  | llvm::DenseMap<FormatToken *, FormatToken *> | 
|  | SpelledParentToReconstructedParent; | 
|  |  | 
|  | // Keeps track of a single expansion while we're reconstructing tokens it | 
|  | // generated. | 
|  | struct Expansion { | 
|  | // The identifier token of the macro call. | 
|  | FormatToken *ID; | 
|  | // Our current position in the reconstruction. | 
|  | std::list<UnwrappedLineNode>::iterator SpelledI; | 
|  | // The end of the reconstructed token sequence. | 
|  | std::list<UnwrappedLineNode>::iterator SpelledE; | 
|  | }; | 
|  |  | 
|  | // Stack of macro calls for which we're in the middle of an expansion. | 
|  | llvm::SmallVector<Expansion> ActiveExpansions; | 
|  |  | 
|  | struct MacroCallState { | 
|  | MacroCallState(ReconstructedLine *Line, FormatToken *ParentLastToken, | 
|  | FormatToken *MacroCallLParen); | 
|  |  | 
|  | ReconstructedLine *Line; | 
|  |  | 
|  | // The last token in the parent line or expansion, or nullptr if the macro | 
|  | // expansion is on a top-level line. | 
|  | // | 
|  | // For example, in the macro call: | 
|  | //   auto f = []() { ID(1); }; | 
|  | // The MacroCallState for ID will have '{' as ParentLastToken. | 
|  | // | 
|  | // In the macro call: | 
|  | //   ID(ID(void f())); | 
|  | // The MacroCallState of the outer ID will have nullptr as ParentLastToken, | 
|  | // while the MacroCallState for the inner ID will have the '(' of the outer | 
|  | // ID as ParentLastToken. | 
|  | // | 
|  | // In the macro call: | 
|  | //   ID2(a, ID(b)); | 
|  | // The MacroCallState of ID will have ',' as ParentLastToken. | 
|  | FormatToken *ParentLastToken; | 
|  |  | 
|  | // The l_paren of this MacroCallState's macro call. | 
|  | FormatToken *MacroCallLParen; | 
|  | }; | 
|  |  | 
|  | // Keeps track of the lines into which the opening brace/parenthesis & | 
|  | // argument separating commas for each level in the macro call go in order to | 
|  | // put the corresponding closing brace/parenthesis into the same line in the | 
|  | // output and keep track of which parents in the expanded token stream map to | 
|  | // which tokens in the reconstructed stream. | 
|  | // When an opening brace/parenthesis has children, we want the structure of | 
|  | // the output line to be: | 
|  | // |- MACRO | 
|  | // |- ( | 
|  | // |  \- <argument> | 
|  | // |- , | 
|  | // |  \- <argument> | 
|  | // \- ) | 
|  | llvm::SmallVector<MacroCallState> MacroCallStructure; | 
|  |  | 
|  | // Level the generated UnwrappedLine will be at. | 
|  | const unsigned Level; | 
|  |  | 
|  | // Maps from identifier of the macro call to an unwrapped line containing | 
|  | // all tokens of the macro call. | 
|  | const llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> | 
|  | &IdToReconstructed; | 
|  | }; | 
|  |  | 
|  | } // namespace format | 
|  | } // namespace clang | 
|  |  | 
|  | #endif |