|  | //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | /// | 
|  | /// \file | 
|  | /// This file defines the log symbolizer markup data model and parser. | 
|  | /// | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "llvm/DebugInfo/Symbolize/Markup.h" | 
|  |  | 
|  | #include "llvm/ADT/STLExtras.h" | 
|  | #include "llvm/ADT/StringExtras.h" | 
|  |  | 
|  | namespace llvm { | 
|  | namespace symbolize { | 
|  |  | 
|  | // Matches the following: | 
|  | //   "\033[0m" | 
|  | //   "\033[1m" | 
|  | //   "\033[30m" -- "\033[37m" | 
|  | static const char SGRSyntaxStr[] = "\033\\[([0-1]|3[0-7])m"; | 
|  |  | 
|  | MarkupParser::MarkupParser(StringSet<> MultilineTags) | 
|  | : MultilineTags(std::move(MultilineTags)), SGRSyntax(SGRSyntaxStr) {} | 
|  |  | 
|  | static StringRef takeTo(StringRef Str, StringRef::iterator Pos) { | 
|  | return Str.take_front(Pos - Str.begin()); | 
|  | } | 
|  | static void advanceTo(StringRef &Str, StringRef::iterator Pos) { | 
|  | Str = Str.drop_front(Pos - Str.begin()); | 
|  | } | 
|  |  | 
|  | void MarkupParser::parseLine(StringRef Line) { | 
|  | Buffer.clear(); | 
|  | NextIdx = 0; | 
|  | FinishedMultiline.clear(); | 
|  | this->Line = Line; | 
|  | } | 
|  |  | 
|  | Optional<MarkupNode> MarkupParser::nextNode() { | 
|  | // Pull something out of the buffer if possible. | 
|  | if (!Buffer.empty()) { | 
|  | if (NextIdx < Buffer.size()) | 
|  | return std::move(Buffer[NextIdx++]); | 
|  | NextIdx = 0; | 
|  | Buffer.clear(); | 
|  | } | 
|  |  | 
|  | // The buffer is empty, so parse the next bit of the line. | 
|  |  | 
|  | if (Line.empty()) | 
|  | return None; | 
|  |  | 
|  | if (!InProgressMultiline.empty()) { | 
|  | if (Optional<StringRef> MultilineEnd = parseMultiLineEnd(Line)) { | 
|  | llvm::append_range(InProgressMultiline, *MultilineEnd); | 
|  | assert(FinishedMultiline.empty() && | 
|  | "At most one multi-line element can be finished at a time."); | 
|  | FinishedMultiline.swap(InProgressMultiline); | 
|  | // Parse the multi-line element as if it were contiguous. | 
|  | advanceTo(Line, MultilineEnd->end()); | 
|  | return *parseElement(FinishedMultiline); | 
|  | } | 
|  |  | 
|  | // The whole line is part of the multi-line element. | 
|  | llvm::append_range(InProgressMultiline, Line); | 
|  | Line = Line.drop_front(Line.size()); | 
|  | return None; | 
|  | } | 
|  |  | 
|  | // Find the first valid markup element, if any. | 
|  | if (Optional<MarkupNode> Element = parseElement(Line)) { | 
|  | parseTextOutsideMarkup(takeTo(Line, Element->Text.begin())); | 
|  | Buffer.push_back(std::move(*Element)); | 
|  | advanceTo(Line, Element->Text.end()); | 
|  | return nextNode(); | 
|  | } | 
|  |  | 
|  | // Since there were no valid elements remaining, see if the line opens a | 
|  | // multi-line element. | 
|  | if (Optional<StringRef> MultilineBegin = parseMultiLineBegin(Line)) { | 
|  | // Emit any text before the element. | 
|  | parseTextOutsideMarkup(takeTo(Line, MultilineBegin->begin())); | 
|  |  | 
|  | // Begin recording the multi-line element. | 
|  | llvm::append_range(InProgressMultiline, *MultilineBegin); | 
|  | Line = Line.drop_front(Line.size()); | 
|  | return nextNode(); | 
|  | } | 
|  |  | 
|  | // The line doesn't contain any more markup elements, so emit it as text. | 
|  | parseTextOutsideMarkup(Line); | 
|  | Line = Line.drop_front(Line.size()); | 
|  | return nextNode(); | 
|  | } | 
|  |  | 
|  | void MarkupParser::flush() { | 
|  | Buffer.clear(); | 
|  | NextIdx = 0; | 
|  | Line = {}; | 
|  | if (InProgressMultiline.empty()) | 
|  | return; | 
|  | FinishedMultiline.swap(InProgressMultiline); | 
|  | parseTextOutsideMarkup(FinishedMultiline); | 
|  | } | 
|  |  | 
|  | // Finds and returns the next valid markup element in the given line. Returns | 
|  | // None if the line contains no valid elements. | 
|  | Optional<MarkupNode> MarkupParser::parseElement(StringRef Line) { | 
|  | while (true) { | 
|  | // Find next element using begin and end markers. | 
|  | size_t BeginPos = Line.find("{{{"); | 
|  | if (BeginPos == StringRef::npos) | 
|  | return None; | 
|  | size_t EndPos = Line.find("}}}", BeginPos + 3); | 
|  | if (EndPos == StringRef::npos) | 
|  | return None; | 
|  | EndPos += 3; | 
|  | MarkupNode Element; | 
|  | Element.Text = Line.slice(BeginPos, EndPos); | 
|  | Line = Line.substr(EndPos); | 
|  |  | 
|  | // Parse tag. | 
|  | StringRef Content = Element.Text.drop_front(3).drop_back(3); | 
|  | StringRef FieldsContent; | 
|  | std::tie(Element.Tag, FieldsContent) = Content.split(':'); | 
|  | if (Element.Tag.empty()) | 
|  | continue; | 
|  |  | 
|  | // Parse fields. | 
|  | if (!FieldsContent.empty()) | 
|  | FieldsContent.split(Element.Fields, ":"); | 
|  | else if (Content.back() == ':') | 
|  | Element.Fields.push_back(FieldsContent); | 
|  |  | 
|  | return Element; | 
|  | } | 
|  | } | 
|  |  | 
|  | static MarkupNode textNode(StringRef Text) { | 
|  | MarkupNode Node; | 
|  | Node.Text = Text; | 
|  | return Node; | 
|  | } | 
|  |  | 
|  | // Parses a region of text known to be outside any markup elements. Such text | 
|  | // may still contain SGR control codes, so the region is further subdivided into | 
|  | // control codes and true text regions. | 
|  | void MarkupParser::parseTextOutsideMarkup(StringRef Text) { | 
|  | if (Text.empty()) | 
|  | return; | 
|  | SmallVector<StringRef> Matches; | 
|  | while (SGRSyntax.match(Text, &Matches)) { | 
|  | // Emit any text before the SGR element. | 
|  | if (Matches.begin()->begin() != Text.begin()) | 
|  | Buffer.push_back(textNode(takeTo(Text, Matches.begin()->begin()))); | 
|  |  | 
|  | Buffer.push_back(textNode(*Matches.begin())); | 
|  | advanceTo(Text, Matches.begin()->end()); | 
|  | } | 
|  | if (!Text.empty()) | 
|  | Buffer.push_back(textNode(Text)); | 
|  | } | 
|  |  | 
|  | // Given that a line doesn't contain any valid markup, see if it ends with the | 
|  | // start of a multi-line element. If so, returns the beginning. | 
|  | Optional<StringRef> MarkupParser::parseMultiLineBegin(StringRef Line) { | 
|  | // A multi-line begin marker must be the last one on the line. | 
|  | size_t BeginPos = Line.rfind("{{{"); | 
|  | if (BeginPos == StringRef::npos) | 
|  | return None; | 
|  | size_t BeginTagPos = BeginPos + 3; | 
|  |  | 
|  | // If there are any end markers afterwards, the begin marker cannot belong to | 
|  | // a multi-line element. | 
|  | size_t EndPos = Line.find("}}}", BeginTagPos); | 
|  | if (EndPos != StringRef::npos) | 
|  | return None; | 
|  |  | 
|  | // Check whether the tag is registered multi-line. | 
|  | size_t EndTagPos = Line.find(':', BeginTagPos); | 
|  | if (EndTagPos == StringRef::npos) | 
|  | return None; | 
|  | StringRef Tag = Line.slice(BeginTagPos, EndTagPos); | 
|  | if (!MultilineTags.contains(Tag)) | 
|  | return None; | 
|  | return Line.substr(BeginPos); | 
|  | } | 
|  |  | 
|  | // See if the line begins with the ending of an in-progress multi-line element. | 
|  | // If so, return the ending. | 
|  | Optional<StringRef> MarkupParser::parseMultiLineEnd(StringRef Line) { | 
|  | size_t EndPos = Line.find("}}}"); | 
|  | if (EndPos == StringRef::npos) | 
|  | return None; | 
|  | return Line.take_front(EndPos + 3); | 
|  | } | 
|  |  | 
|  | } // end namespace symbolize | 
|  | } // end namespace llvm |