| //===--- CommentParser.cpp - Doxygen comment parser -----------------------===// | 
 | // | 
 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
 | // See https://llvm.org/LICENSE.txt for license information. | 
 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
 | // | 
 | //===----------------------------------------------------------------------===// | 
 |  | 
 | #include "clang/AST/CommentParser.h" | 
 | #include "clang/AST/CommentCommandTraits.h" | 
 | #include "clang/AST/CommentSema.h" | 
 | #include "clang/Basic/CharInfo.h" | 
 | #include "clang/Basic/DiagnosticComment.h" | 
 | #include "clang/Basic/SourceManager.h" | 
 | #include "llvm/Support/ErrorHandling.h" | 
 |  | 
 | namespace clang { | 
 |  | 
 | static inline bool isWhitespace(llvm::StringRef S) { | 
 |   for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) { | 
 |     if (!isWhitespace(*I)) | 
 |       return false; | 
 |   } | 
 |   return true; | 
 | } | 
 |  | 
 | namespace comments { | 
 |  | 
 | /// Re-lexes a sequence of tok::text tokens. | 
 | class TextTokenRetokenizer { | 
 |   llvm::BumpPtrAllocator &Allocator; | 
 |   Parser &P; | 
 |  | 
 |   /// This flag is set when there are no more tokens we can fetch from lexer. | 
 |   bool NoMoreInterestingTokens; | 
 |  | 
 |   /// Token buffer: tokens we have processed and lookahead. | 
 |   SmallVector<Token, 16> Toks; | 
 |  | 
 |   /// A position in \c Toks. | 
 |   struct Position { | 
 |     const char *BufferStart; | 
 |     const char *BufferEnd; | 
 |     const char *BufferPtr; | 
 |     SourceLocation BufferStartLoc; | 
 |     unsigned CurToken; | 
 |   }; | 
 |  | 
 |   /// Current position in Toks. | 
 |   Position Pos; | 
 |  | 
 |   bool isEnd() const { | 
 |     return Pos.CurToken >= Toks.size(); | 
 |   } | 
 |  | 
 |   /// Sets up the buffer pointers to point to current token. | 
 |   void setupBuffer() { | 
 |     assert(!isEnd()); | 
 |     const Token &Tok = Toks[Pos.CurToken]; | 
 |  | 
 |     Pos.BufferStart = Tok.getText().begin(); | 
 |     Pos.BufferEnd = Tok.getText().end(); | 
 |     Pos.BufferPtr = Pos.BufferStart; | 
 |     Pos.BufferStartLoc = Tok.getLocation(); | 
 |   } | 
 |  | 
 |   SourceLocation getSourceLocation() const { | 
 |     const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart; | 
 |     return Pos.BufferStartLoc.getLocWithOffset(CharNo); | 
 |   } | 
 |  | 
 |   char peek() const { | 
 |     assert(!isEnd()); | 
 |     assert(Pos.BufferPtr != Pos.BufferEnd); | 
 |     return *Pos.BufferPtr; | 
 |   } | 
 |  | 
 |   void consumeChar() { | 
 |     assert(!isEnd()); | 
 |     assert(Pos.BufferPtr != Pos.BufferEnd); | 
 |     Pos.BufferPtr++; | 
 |     if (Pos.BufferPtr == Pos.BufferEnd) { | 
 |       Pos.CurToken++; | 
 |       if (isEnd() && !addToken()) | 
 |         return; | 
 |  | 
 |       assert(!isEnd()); | 
 |       setupBuffer(); | 
 |     } | 
 |   } | 
 |  | 
 |   /// Extract a template type | 
 |   bool lexTemplate(SmallString<32> &WordText) { | 
 |     unsigned BracketCount = 0; | 
 |     while (!isEnd()) { | 
 |       const char C = peek(); | 
 |       WordText.push_back(C); | 
 |       consumeChar(); | 
 |       switch (C) { | 
 |       case '<': { | 
 |         BracketCount++; | 
 |         break; | 
 |       } | 
 |       case '>': { | 
 |         BracketCount--; | 
 |         if (!BracketCount) | 
 |           return true; | 
 |         break; | 
 |       } | 
 |       default: | 
 |         break; | 
 |       } | 
 |     } | 
 |     return false; | 
 |   } | 
 |  | 
 |   /// Add a token. | 
 |   /// Returns true on success, false if there are no interesting tokens to | 
 |   /// fetch from lexer. | 
 |   bool addToken() { | 
 |     if (NoMoreInterestingTokens) | 
 |       return false; | 
 |  | 
 |     if (P.Tok.is(tok::newline)) { | 
 |       // If we see a single newline token between text tokens, skip it. | 
 |       Token Newline = P.Tok; | 
 |       P.consumeToken(); | 
 |       if (P.Tok.isNot(tok::text)) { | 
 |         P.putBack(Newline); | 
 |         NoMoreInterestingTokens = true; | 
 |         return false; | 
 |       } | 
 |     } | 
 |     if (P.Tok.isNot(tok::text)) { | 
 |       NoMoreInterestingTokens = true; | 
 |       return false; | 
 |     } | 
 |  | 
 |     Toks.push_back(P.Tok); | 
 |     P.consumeToken(); | 
 |     if (Toks.size() == 1) | 
 |       setupBuffer(); | 
 |     return true; | 
 |   } | 
 |  | 
 |   void consumeWhitespace() { | 
 |     while (!isEnd()) { | 
 |       if (isWhitespace(peek())) | 
 |         consumeChar(); | 
 |       else | 
 |         break; | 
 |     } | 
 |   } | 
 |  | 
 |   void formTokenWithChars(Token &Result, | 
 |                           SourceLocation Loc, | 
 |                           const char *TokBegin, | 
 |                           unsigned TokLength, | 
 |                           StringRef Text) { | 
 |     Result.setLocation(Loc); | 
 |     Result.setKind(tok::text); | 
 |     Result.setLength(TokLength); | 
 | #ifndef NDEBUG | 
 |     Result.TextPtr = "<UNSET>"; | 
 |     Result.IntVal = 7; | 
 | #endif | 
 |     Result.setText(Text); | 
 |   } | 
 |  | 
 | public: | 
 |   TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P): | 
 |       Allocator(Allocator), P(P), NoMoreInterestingTokens(false) { | 
 |     Pos.CurToken = 0; | 
 |     addToken(); | 
 |   } | 
 |  | 
 |   /// Extract a type argument | 
 |   bool lexType(Token &Tok) { | 
 |     if (isEnd()) | 
 |       return false; | 
 |  | 
 |     // Save current position in case we need to rollback because the type is | 
 |     // empty. | 
 |     Position SavedPos = Pos; | 
 |  | 
 |     // Consume any leading whitespace. | 
 |     consumeWhitespace(); | 
 |     SmallString<32> WordText; | 
 |     const char *WordBegin = Pos.BufferPtr; | 
 |     SourceLocation Loc = getSourceLocation(); | 
 |  | 
 |     while (!isEnd()) { | 
 |       const char C = peek(); | 
 |       // For non-whitespace characters we check if it's a template or otherwise | 
 |       // continue reading the text into a word. | 
 |       if (!isWhitespace(C)) { | 
 |         if (C == '<') { | 
 |           if (!lexTemplate(WordText)) | 
 |             return false; | 
 |         } else { | 
 |           WordText.push_back(C); | 
 |           consumeChar(); | 
 |         } | 
 |       } else { | 
 |         consumeChar(); | 
 |         break; | 
 |       } | 
 |     } | 
 |  | 
 |     const unsigned Length = WordText.size(); | 
 |     if (Length == 0) { | 
 |       Pos = SavedPos; | 
 |       return false; | 
 |     } | 
 |  | 
 |     char *TextPtr = Allocator.Allocate<char>(Length + 1); | 
 |  | 
 |     memcpy(TextPtr, WordText.c_str(), Length + 1); | 
 |     StringRef Text = StringRef(TextPtr, Length); | 
 |  | 
 |     formTokenWithChars(Tok, Loc, WordBegin, Length, Text); | 
 |     return true; | 
 |   } | 
 |  | 
 |   // Check if this line starts with @par or \par | 
 |   bool startsWithParCommand() { | 
 |     unsigned Offset = 1; | 
 |  | 
 |     // Skip all whitespace characters at the beginning. | 
 |     // This needs to backtrack because Pos has already advanced past the | 
 |     // actual \par or @par command by the time this function is called. | 
 |     while (isWhitespace(*(Pos.BufferPtr - Offset))) | 
 |       Offset++; | 
 |  | 
 |     // Once we've reached the whitespace, backtrack and check if the previous | 
 |     // four characters are \par or @par. | 
 |     llvm::StringRef LineStart(Pos.BufferPtr - Offset - 3, 4); | 
 |     return LineStart.starts_with("\\par") || LineStart.starts_with("@par"); | 
 |   } | 
 |  | 
 |   /// Extract a par command argument-header. | 
 |   bool lexParHeading(Token &Tok) { | 
 |     if (isEnd()) | 
 |       return false; | 
 |  | 
 |     Position SavedPos = Pos; | 
 |  | 
 |     consumeWhitespace(); | 
 |     SmallString<32> WordText; | 
 |     const char *WordBegin = Pos.BufferPtr; | 
 |     SourceLocation Loc = getSourceLocation(); | 
 |  | 
 |     if (!startsWithParCommand()) | 
 |       return false; | 
 |  | 
 |     // Read until the end of this token, which is effectively the end of the | 
 |     // line. This gets us the content of the par header, if there is one. | 
 |     while (!isEnd()) { | 
 |       WordText.push_back(peek()); | 
 |       if (Pos.BufferPtr + 1 == Pos.BufferEnd) { | 
 |         consumeChar(); | 
 |         break; | 
 |       } | 
 |       consumeChar(); | 
 |     } | 
 |  | 
 |     unsigned Length = WordText.size(); | 
 |     if (Length == 0) { | 
 |       Pos = SavedPos; | 
 |       return false; | 
 |     } | 
 |  | 
 |     char *TextPtr = Allocator.Allocate<char>(Length + 1); | 
 |  | 
 |     memcpy(TextPtr, WordText.c_str(), Length + 1); | 
 |     StringRef Text = StringRef(TextPtr, Length); | 
 |  | 
 |     formTokenWithChars(Tok, Loc, WordBegin, Length, Text); | 
 |     return true; | 
 |   } | 
 |  | 
 |   /// Extract a word -- sequence of non-whitespace characters. | 
 |   bool lexWord(Token &Tok) { | 
 |     if (isEnd()) | 
 |       return false; | 
 |  | 
 |     Position SavedPos = Pos; | 
 |  | 
 |     consumeWhitespace(); | 
 |     SmallString<32> WordText; | 
 |     const char *WordBegin = Pos.BufferPtr; | 
 |     SourceLocation Loc = getSourceLocation(); | 
 |     while (!isEnd()) { | 
 |       const char C = peek(); | 
 |       if (!isWhitespace(C)) { | 
 |         WordText.push_back(C); | 
 |         consumeChar(); | 
 |       } else | 
 |         break; | 
 |     } | 
 |     const unsigned Length = WordText.size(); | 
 |     if (Length == 0) { | 
 |       Pos = SavedPos; | 
 |       return false; | 
 |     } | 
 |  | 
 |     char *TextPtr = Allocator.Allocate<char>(Length + 1); | 
 |  | 
 |     memcpy(TextPtr, WordText.c_str(), Length + 1); | 
 |     StringRef Text = StringRef(TextPtr, Length); | 
 |  | 
 |     formTokenWithChars(Tok, Loc, WordBegin, Length, Text); | 
 |     return true; | 
 |   } | 
 |  | 
 |   bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) { | 
 |     if (isEnd()) | 
 |       return false; | 
 |  | 
 |     Position SavedPos = Pos; | 
 |  | 
 |     consumeWhitespace(); | 
 |     SmallString<32> WordText; | 
 |     const char *WordBegin = Pos.BufferPtr; | 
 |     SourceLocation Loc = getSourceLocation(); | 
 |     bool Error = false; | 
 |     if (!isEnd()) { | 
 |       const char C = peek(); | 
 |       if (C == OpenDelim) { | 
 |         WordText.push_back(C); | 
 |         consumeChar(); | 
 |       } else | 
 |         Error = true; | 
 |     } | 
 |     char C = '\0'; | 
 |     while (!Error && !isEnd()) { | 
 |       C = peek(); | 
 |       WordText.push_back(C); | 
 |       consumeChar(); | 
 |       if (C == CloseDelim) | 
 |         break; | 
 |     } | 
 |     if (!Error && C != CloseDelim) | 
 |       Error = true; | 
 |  | 
 |     if (Error) { | 
 |       Pos = SavedPos; | 
 |       return false; | 
 |     } | 
 |  | 
 |     const unsigned Length = WordText.size(); | 
 |     char *TextPtr = Allocator.Allocate<char>(Length + 1); | 
 |  | 
 |     memcpy(TextPtr, WordText.c_str(), Length + 1); | 
 |     StringRef Text = StringRef(TextPtr, Length); | 
 |  | 
 |     formTokenWithChars(Tok, Loc, WordBegin, | 
 |                        Pos.BufferPtr - WordBegin, Text); | 
 |     return true; | 
 |   } | 
 |  | 
 |   /// Put back tokens that we didn't consume. | 
 |   void putBackLeftoverTokens() { | 
 |     if (isEnd()) | 
 |       return; | 
 |  | 
 |     bool HavePartialTok = false; | 
 |     Token PartialTok; | 
 |     if (Pos.BufferPtr != Pos.BufferStart) { | 
 |       formTokenWithChars(PartialTok, getSourceLocation(), | 
 |                          Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr, | 
 |                          StringRef(Pos.BufferPtr, | 
 |                                    Pos.BufferEnd - Pos.BufferPtr)); | 
 |       HavePartialTok = true; | 
 |       Pos.CurToken++; | 
 |     } | 
 |  | 
 |     P.putBack(llvm::ArrayRef(Toks.begin() + Pos.CurToken, Toks.end())); | 
 |     Pos.CurToken = Toks.size(); | 
 |  | 
 |     if (HavePartialTok) | 
 |       P.putBack(PartialTok); | 
 |   } | 
 | }; | 
 |  | 
 | Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator, | 
 |                const SourceManager &SourceMgr, DiagnosticsEngine &Diags, | 
 |                const CommandTraits &Traits): | 
 |     L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags), | 
 |     Traits(Traits) { | 
 |   consumeToken(); | 
 | } | 
 |  | 
 | void Parser::parseParamCommandArgs(ParamCommandComment *PC, | 
 |                                    TextTokenRetokenizer &Retokenizer) { | 
 |   Token Arg; | 
 |   // Check if argument looks like direction specification: [dir] | 
 |   // e.g., [in], [out], [in,out] | 
 |   if (Retokenizer.lexDelimitedSeq(Arg, '[', ']')) | 
 |     S.actOnParamCommandDirectionArg(PC, | 
 |                                     Arg.getLocation(), | 
 |                                     Arg.getEndLocation(), | 
 |                                     Arg.getText()); | 
 |  | 
 |   if (Retokenizer.lexWord(Arg)) | 
 |     S.actOnParamCommandParamNameArg(PC, | 
 |                                     Arg.getLocation(), | 
 |                                     Arg.getEndLocation(), | 
 |                                     Arg.getText()); | 
 | } | 
 |  | 
 | void Parser::parseTParamCommandArgs(TParamCommandComment *TPC, | 
 |                                     TextTokenRetokenizer &Retokenizer) { | 
 |   Token Arg; | 
 |   if (Retokenizer.lexWord(Arg)) | 
 |     S.actOnTParamCommandParamNameArg(TPC, | 
 |                                      Arg.getLocation(), | 
 |                                      Arg.getEndLocation(), | 
 |                                      Arg.getText()); | 
 | } | 
 |  | 
 | ArrayRef<Comment::Argument> | 
 | Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs) { | 
 |   auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) | 
 |       Comment::Argument[NumArgs]; | 
 |   unsigned ParsedArgs = 0; | 
 |   Token Arg; | 
 |   while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) { | 
 |     Args[ParsedArgs] = Comment::Argument{ | 
 |         SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; | 
 |     ParsedArgs++; | 
 |   } | 
 |  | 
 |   return llvm::ArrayRef(Args, ParsedArgs); | 
 | } | 
 |  | 
 | ArrayRef<Comment::Argument> | 
 | Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, | 
 |                               unsigned NumArgs) { | 
 |   auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) | 
 |       Comment::Argument[NumArgs]; | 
 |   unsigned ParsedArgs = 0; | 
 |   Token Arg; | 
 |  | 
 |   while (ParsedArgs < NumArgs && Retokenizer.lexType(Arg)) { | 
 |     Args[ParsedArgs] = Comment::Argument{ | 
 |         SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; | 
 |     ParsedArgs++; | 
 |   } | 
 |  | 
 |   return llvm::ArrayRef(Args, ParsedArgs); | 
 | } | 
 |  | 
 | ArrayRef<Comment::Argument> | 
 | Parser::parseParCommandArgs(TextTokenRetokenizer &Retokenizer, | 
 |                             unsigned NumArgs) { | 
 |   assert(NumArgs > 0); | 
 |   auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs)) | 
 |       Comment::Argument[NumArgs]; | 
 |   unsigned ParsedArgs = 0; | 
 |   Token Arg; | 
 |  | 
 |   while (ParsedArgs < NumArgs && Retokenizer.lexParHeading(Arg)) { | 
 |     Args[ParsedArgs] = Comment::Argument{ | 
 |         SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()}; | 
 |     ParsedArgs++; | 
 |   } | 
 |  | 
 |   return llvm::ArrayRef(Args, ParsedArgs); | 
 | } | 
 |  | 
 | BlockCommandComment *Parser::parseBlockCommand() { | 
 |   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); | 
 |  | 
 |   ParamCommandComment *PC = nullptr; | 
 |   TParamCommandComment *TPC = nullptr; | 
 |   BlockCommandComment *BC = nullptr; | 
 |   const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); | 
 |   CommandMarkerKind CommandMarker = | 
 |       Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At; | 
 |   if (Info->IsParamCommand) { | 
 |     PC = S.actOnParamCommandStart(Tok.getLocation(), | 
 |                                   Tok.getEndLocation(), | 
 |                                   Tok.getCommandID(), | 
 |                                   CommandMarker); | 
 |   } else if (Info->IsTParamCommand) { | 
 |     TPC = S.actOnTParamCommandStart(Tok.getLocation(), | 
 |                                     Tok.getEndLocation(), | 
 |                                     Tok.getCommandID(), | 
 |                                     CommandMarker); | 
 |   } else { | 
 |     BC = S.actOnBlockCommandStart(Tok.getLocation(), | 
 |                                   Tok.getEndLocation(), | 
 |                                   Tok.getCommandID(), | 
 |                                   CommandMarker); | 
 |   } | 
 |   consumeToken(); | 
 |  | 
 |   if (isTokBlockCommand()) { | 
 |     // Block command ahead.  We can't nest block commands, so pretend that this | 
 |     // command has an empty argument. | 
 |     ParagraphComment *Paragraph = S.actOnParagraphComment({}); | 
 |     if (PC) { | 
 |       S.actOnParamCommandFinish(PC, Paragraph); | 
 |       return PC; | 
 |     } else if (TPC) { | 
 |       S.actOnTParamCommandFinish(TPC, Paragraph); | 
 |       return TPC; | 
 |     } else { | 
 |       S.actOnBlockCommandFinish(BC, Paragraph); | 
 |       return BC; | 
 |     } | 
 |   } | 
 |  | 
 |   if (PC || TPC || Info->NumArgs > 0) { | 
 |     // In order to parse command arguments we need to retokenize a few | 
 |     // following text tokens. | 
 |     TextTokenRetokenizer Retokenizer(Allocator, *this); | 
 |  | 
 |     if (PC) | 
 |       parseParamCommandArgs(PC, Retokenizer); | 
 |     else if (TPC) | 
 |       parseTParamCommandArgs(TPC, Retokenizer); | 
 |     else if (Info->IsThrowsCommand) | 
 |       S.actOnBlockCommandArgs( | 
 |           BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs)); | 
 |     else if (Info->IsParCommand) | 
 |       S.actOnBlockCommandArgs(BC, | 
 |                               parseParCommandArgs(Retokenizer, Info->NumArgs)); | 
 |     else | 
 |       S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, Info->NumArgs)); | 
 |  | 
 |     Retokenizer.putBackLeftoverTokens(); | 
 |   } | 
 |  | 
 |   // If there's a block command ahead, we will attach an empty paragraph to | 
 |   // this command. | 
 |   bool EmptyParagraph = false; | 
 |   if (isTokBlockCommand()) | 
 |     EmptyParagraph = true; | 
 |   else if (Tok.is(tok::newline)) { | 
 |     Token PrevTok = Tok; | 
 |     consumeToken(); | 
 |     EmptyParagraph = isTokBlockCommand(); | 
 |     putBack(PrevTok); | 
 |   } | 
 |  | 
 |   ParagraphComment *Paragraph; | 
 |   if (EmptyParagraph) | 
 |     Paragraph = S.actOnParagraphComment({}); | 
 |   else { | 
 |     BlockContentComment *Block = parseParagraphOrBlockCommand(); | 
 |     // Since we have checked for a block command, we should have parsed a | 
 |     // paragraph. | 
 |     Paragraph = cast<ParagraphComment>(Block); | 
 |   } | 
 |  | 
 |   if (PC) { | 
 |     S.actOnParamCommandFinish(PC, Paragraph); | 
 |     return PC; | 
 |   } else if (TPC) { | 
 |     S.actOnTParamCommandFinish(TPC, Paragraph); | 
 |     return TPC; | 
 |   } else { | 
 |     S.actOnBlockCommandFinish(BC, Paragraph); | 
 |     return BC; | 
 |   } | 
 | } | 
 |  | 
 | InlineCommandComment *Parser::parseInlineCommand() { | 
 |   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command)); | 
 |   const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); | 
 |  | 
 |   const Token CommandTok = Tok; | 
 |   consumeToken(); | 
 |  | 
 |   TextTokenRetokenizer Retokenizer(Allocator, *this); | 
 |   ArrayRef<Comment::Argument> Args = | 
 |       parseCommandArgs(Retokenizer, Info->NumArgs); | 
 |  | 
 |   InlineCommandComment *IC = S.actOnInlineCommand( | 
 |       CommandTok.getLocation(), CommandTok.getEndLocation(), | 
 |       CommandTok.getCommandID(), Args); | 
 |  | 
 |   if (Args.size() < Info->NumArgs) { | 
 |     Diag(CommandTok.getEndLocation().getLocWithOffset(1), | 
 |          diag::warn_doc_inline_command_not_enough_arguments) | 
 |         << CommandTok.is(tok::at_command) << Info->Name << Args.size() | 
 |         << Info->NumArgs | 
 |         << SourceRange(CommandTok.getLocation(), CommandTok.getEndLocation()); | 
 |   } | 
 |  | 
 |   Retokenizer.putBackLeftoverTokens(); | 
 |  | 
 |   return IC; | 
 | } | 
 |  | 
 | HTMLStartTagComment *Parser::parseHTMLStartTag() { | 
 |   assert(Tok.is(tok::html_start_tag)); | 
 |   HTMLStartTagComment *HST = | 
 |       S.actOnHTMLStartTagStart(Tok.getLocation(), | 
 |                                Tok.getHTMLTagStartName()); | 
 |   consumeToken(); | 
 |  | 
 |   SmallVector<HTMLStartTagComment::Attribute, 2> Attrs; | 
 |   while (true) { | 
 |     switch (Tok.getKind()) { | 
 |     case tok::html_ident: { | 
 |       Token Ident = Tok; | 
 |       consumeToken(); | 
 |       if (Tok.isNot(tok::html_equals)) { | 
 |         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), | 
 |                                                        Ident.getHTMLIdent())); | 
 |         continue; | 
 |       } | 
 |       Token Equals = Tok; | 
 |       consumeToken(); | 
 |       if (Tok.isNot(tok::html_quoted_string)) { | 
 |         Diag(Tok.getLocation(), | 
 |              diag::warn_doc_html_start_tag_expected_quoted_string) | 
 |           << SourceRange(Equals.getLocation()); | 
 |         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(), | 
 |                                                        Ident.getHTMLIdent())); | 
 |         while (Tok.is(tok::html_equals) || | 
 |                Tok.is(tok::html_quoted_string)) | 
 |           consumeToken(); | 
 |         continue; | 
 |       } | 
 |       Attrs.push_back(HTMLStartTagComment::Attribute( | 
 |                               Ident.getLocation(), | 
 |                               Ident.getHTMLIdent(), | 
 |                               Equals.getLocation(), | 
 |                               SourceRange(Tok.getLocation(), | 
 |                                           Tok.getEndLocation()), | 
 |                               Tok.getHTMLQuotedString())); | 
 |       consumeToken(); | 
 |       continue; | 
 |     } | 
 |  | 
 |     case tok::html_greater: | 
 |       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), | 
 |                                 Tok.getLocation(), | 
 |                                 /* IsSelfClosing = */ false); | 
 |       consumeToken(); | 
 |       return HST; | 
 |  | 
 |     case tok::html_slash_greater: | 
 |       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), | 
 |                                 Tok.getLocation(), | 
 |                                 /* IsSelfClosing = */ true); | 
 |       consumeToken(); | 
 |       return HST; | 
 |  | 
 |     case tok::html_equals: | 
 |     case tok::html_quoted_string: | 
 |       Diag(Tok.getLocation(), | 
 |            diag::warn_doc_html_start_tag_expected_ident_or_greater); | 
 |       while (Tok.is(tok::html_equals) || | 
 |              Tok.is(tok::html_quoted_string)) | 
 |         consumeToken(); | 
 |       if (Tok.is(tok::html_ident) || | 
 |           Tok.is(tok::html_greater) || | 
 |           Tok.is(tok::html_slash_greater)) | 
 |         continue; | 
 |  | 
 |       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), | 
 |                                 SourceLocation(), | 
 |                                 /* IsSelfClosing = */ false); | 
 |       return HST; | 
 |  | 
 |     default: | 
 |       // Not a token from an HTML start tag.  Thus HTML tag prematurely ended. | 
 |       S.actOnHTMLStartTagFinish(HST, S.copyArray(llvm::ArrayRef(Attrs)), | 
 |                                 SourceLocation(), | 
 |                                 /* IsSelfClosing = */ false); | 
 |       bool StartLineInvalid; | 
 |       const unsigned StartLine = SourceMgr.getPresumedLineNumber( | 
 |                                                   HST->getLocation(), | 
 |                                                   &StartLineInvalid); | 
 |       bool EndLineInvalid; | 
 |       const unsigned EndLine = SourceMgr.getPresumedLineNumber( | 
 |                                                   Tok.getLocation(), | 
 |                                                   &EndLineInvalid); | 
 |       if (StartLineInvalid || EndLineInvalid || StartLine == EndLine) | 
 |         Diag(Tok.getLocation(), | 
 |              diag::warn_doc_html_start_tag_expected_ident_or_greater) | 
 |           << HST->getSourceRange(); | 
 |       else { | 
 |         Diag(Tok.getLocation(), | 
 |              diag::warn_doc_html_start_tag_expected_ident_or_greater); | 
 |         Diag(HST->getLocation(), diag::note_doc_html_tag_started_here) | 
 |           << HST->getSourceRange(); | 
 |       } | 
 |       return HST; | 
 |     } | 
 |   } | 
 | } | 
 |  | 
 | HTMLEndTagComment *Parser::parseHTMLEndTag() { | 
 |   assert(Tok.is(tok::html_end_tag)); | 
 |   Token TokEndTag = Tok; | 
 |   consumeToken(); | 
 |   SourceLocation Loc; | 
 |   if (Tok.is(tok::html_greater)) { | 
 |     Loc = Tok.getLocation(); | 
 |     consumeToken(); | 
 |   } | 
 |  | 
 |   return S.actOnHTMLEndTag(TokEndTag.getLocation(), | 
 |                            Loc, | 
 |                            TokEndTag.getHTMLTagEndName()); | 
 | } | 
 |  | 
 | BlockContentComment *Parser::parseParagraphOrBlockCommand() { | 
 |   SmallVector<InlineContentComment *, 8> Content; | 
 |  | 
 |   while (true) { | 
 |     switch (Tok.getKind()) { | 
 |     case tok::verbatim_block_begin: | 
 |     case tok::verbatim_line_name: | 
 |     case tok::eof: | 
 |       break; // Block content or EOF ahead, finish this parapgaph. | 
 |  | 
 |     case tok::unknown_command: | 
 |       Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), | 
 |                                               Tok.getEndLocation(), | 
 |                                               Tok.getUnknownCommandName())); | 
 |       consumeToken(); | 
 |       continue; | 
 |  | 
 |     case tok::backslash_command: | 
 |     case tok::at_command: { | 
 |       const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID()); | 
 |       if (Info->IsBlockCommand) { | 
 |         if (Content.size() == 0) | 
 |           return parseBlockCommand(); | 
 |         break; // Block command ahead, finish this parapgaph. | 
 |       } | 
 |       if (Info->IsVerbatimBlockEndCommand) { | 
 |         Diag(Tok.getLocation(), | 
 |              diag::warn_verbatim_block_end_without_start) | 
 |           << Tok.is(tok::at_command) | 
 |           << Info->Name | 
 |           << SourceRange(Tok.getLocation(), Tok.getEndLocation()); | 
 |         consumeToken(); | 
 |         continue; | 
 |       } | 
 |       if (Info->IsUnknownCommand) { | 
 |         Content.push_back(S.actOnUnknownCommand(Tok.getLocation(), | 
 |                                                 Tok.getEndLocation(), | 
 |                                                 Info->getID())); | 
 |         consumeToken(); | 
 |         continue; | 
 |       } | 
 |       assert(Info->IsInlineCommand); | 
 |       Content.push_back(parseInlineCommand()); | 
 |       continue; | 
 |     } | 
 |  | 
 |     case tok::newline: { | 
 |       consumeToken(); | 
 |       if (Tok.is(tok::newline) || Tok.is(tok::eof)) { | 
 |         consumeToken(); | 
 |         break; // Two newlines -- end of paragraph. | 
 |       } | 
 |       // Also allow [tok::newline, tok::text, tok::newline] if the middle | 
 |       // tok::text is just whitespace. | 
 |       if (Tok.is(tok::text) && isWhitespace(Tok.getText())) { | 
 |         Token WhitespaceTok = Tok; | 
 |         consumeToken(); | 
 |         if (Tok.is(tok::newline) || Tok.is(tok::eof)) { | 
 |           consumeToken(); | 
 |           break; | 
 |         } | 
 |         // We have [tok::newline, tok::text, non-newline].  Put back tok::text. | 
 |         putBack(WhitespaceTok); | 
 |       } | 
 |       if (Content.size() > 0) | 
 |         Content.back()->addTrailingNewline(); | 
 |       continue; | 
 |     } | 
 |  | 
 |     // Don't deal with HTML tag soup now. | 
 |     case tok::html_start_tag: | 
 |       Content.push_back(parseHTMLStartTag()); | 
 |       continue; | 
 |  | 
 |     case tok::html_end_tag: | 
 |       Content.push_back(parseHTMLEndTag()); | 
 |       continue; | 
 |  | 
 |     case tok::text: | 
 |       Content.push_back(S.actOnText(Tok.getLocation(), | 
 |                                     Tok.getEndLocation(), | 
 |                                     Tok.getText())); | 
 |       consumeToken(); | 
 |       continue; | 
 |  | 
 |     case tok::verbatim_block_line: | 
 |     case tok::verbatim_block_end: | 
 |     case tok::verbatim_line_text: | 
 |     case tok::html_ident: | 
 |     case tok::html_equals: | 
 |     case tok::html_quoted_string: | 
 |     case tok::html_greater: | 
 |     case tok::html_slash_greater: | 
 |       llvm_unreachable("should not see this token"); | 
 |     } | 
 |     break; | 
 |   } | 
 |  | 
 |   return S.actOnParagraphComment(S.copyArray(llvm::ArrayRef(Content))); | 
 | } | 
 |  | 
 | VerbatimBlockComment *Parser::parseVerbatimBlock() { | 
 |   assert(Tok.is(tok::verbatim_block_begin)); | 
 |  | 
 |   VerbatimBlockComment *VB = | 
 |       S.actOnVerbatimBlockStart(Tok.getLocation(), | 
 |                                 Tok.getVerbatimBlockID()); | 
 |   consumeToken(); | 
 |  | 
 |   // Don't create an empty line if verbatim opening command is followed | 
 |   // by a newline. | 
 |   if (Tok.is(tok::newline)) | 
 |     consumeToken(); | 
 |  | 
 |   SmallVector<VerbatimBlockLineComment *, 8> Lines; | 
 |   while (Tok.is(tok::verbatim_block_line) || | 
 |          Tok.is(tok::newline)) { | 
 |     VerbatimBlockLineComment *Line; | 
 |     if (Tok.is(tok::verbatim_block_line)) { | 
 |       Line = S.actOnVerbatimBlockLine(Tok.getLocation(), | 
 |                                       Tok.getVerbatimBlockText()); | 
 |       consumeToken(); | 
 |       if (Tok.is(tok::newline)) { | 
 |         consumeToken(); | 
 |       } | 
 |     } else { | 
 |       // Empty line, just a tok::newline. | 
 |       Line = S.actOnVerbatimBlockLine(Tok.getLocation(), ""); | 
 |       consumeToken(); | 
 |     } | 
 |     Lines.push_back(Line); | 
 |   } | 
 |  | 
 |   if (Tok.is(tok::verbatim_block_end)) { | 
 |     const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID()); | 
 |     S.actOnVerbatimBlockFinish(VB, Tok.getLocation(), Info->Name, | 
 |                                S.copyArray(llvm::ArrayRef(Lines))); | 
 |     consumeToken(); | 
 |   } else { | 
 |     // Unterminated \\verbatim block | 
 |     S.actOnVerbatimBlockFinish(VB, SourceLocation(), "", | 
 |                                S.copyArray(llvm::ArrayRef(Lines))); | 
 |   } | 
 |  | 
 |   return VB; | 
 | } | 
 |  | 
 | VerbatimLineComment *Parser::parseVerbatimLine() { | 
 |   assert(Tok.is(tok::verbatim_line_name)); | 
 |  | 
 |   Token NameTok = Tok; | 
 |   consumeToken(); | 
 |  | 
 |   SourceLocation TextBegin; | 
 |   StringRef Text; | 
 |   // Next token might not be a tok::verbatim_line_text if verbatim line | 
 |   // starting command comes just before a newline or comment end. | 
 |   if (Tok.is(tok::verbatim_line_text)) { | 
 |     TextBegin = Tok.getLocation(); | 
 |     Text = Tok.getVerbatimLineText(); | 
 |   } else { | 
 |     TextBegin = NameTok.getEndLocation(); | 
 |     Text = ""; | 
 |   } | 
 |  | 
 |   VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(), | 
 |                                                 NameTok.getVerbatimLineID(), | 
 |                                                 TextBegin, | 
 |                                                 Text); | 
 |   consumeToken(); | 
 |   return VL; | 
 | } | 
 |  | 
 | BlockContentComment *Parser::parseBlockContent() { | 
 |   switch (Tok.getKind()) { | 
 |   case tok::text: | 
 |   case tok::unknown_command: | 
 |   case tok::backslash_command: | 
 |   case tok::at_command: | 
 |   case tok::html_start_tag: | 
 |   case tok::html_end_tag: | 
 |     return parseParagraphOrBlockCommand(); | 
 |  | 
 |   case tok::verbatim_block_begin: | 
 |     return parseVerbatimBlock(); | 
 |  | 
 |   case tok::verbatim_line_name: | 
 |     return parseVerbatimLine(); | 
 |  | 
 |   case tok::eof: | 
 |   case tok::newline: | 
 |   case tok::verbatim_block_line: | 
 |   case tok::verbatim_block_end: | 
 |   case tok::verbatim_line_text: | 
 |   case tok::html_ident: | 
 |   case tok::html_equals: | 
 |   case tok::html_quoted_string: | 
 |   case tok::html_greater: | 
 |   case tok::html_slash_greater: | 
 |     llvm_unreachable("should not see this token"); | 
 |   } | 
 |   llvm_unreachable("bogus token kind"); | 
 | } | 
 |  | 
 | FullComment *Parser::parseFullComment() { | 
 |   // Skip newlines at the beginning of the comment. | 
 |   while (Tok.is(tok::newline)) | 
 |     consumeToken(); | 
 |  | 
 |   SmallVector<BlockContentComment *, 8> Blocks; | 
 |   while (Tok.isNot(tok::eof)) { | 
 |     Blocks.push_back(parseBlockContent()); | 
 |  | 
 |     // Skip extra newlines after paragraph end. | 
 |     while (Tok.is(tok::newline)) | 
 |       consumeToken(); | 
 |   } | 
 |   return S.actOnFullComment(S.copyArray(llvm::ArrayRef(Blocks))); | 
 | } | 
 |  | 
 | } // end namespace comments | 
 | } // end namespace clang |