| //===--- LRGraph.cpp - -------------------------------------------*- C++-*-===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "clang-pseudo/grammar/LRGraph.h" |
| #include "clang-pseudo/grammar/Grammar.h" |
| #include "llvm/ADT/DenseSet.h" |
| #include "llvm/ADT/Hashing.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/StringExtras.h" |
| #include "llvm/Support/FormatVariadic.h" |
| #include "llvm/Support/raw_ostream.h" |
| |
| using ItemSet = std::vector<clang::pseudo::Item>; |
| |
| namespace llvm { |
| // Support clang::pseudo::Item as DenseMap keys. |
| template <> struct DenseMapInfo<ItemSet> { |
| static inline ItemSet getEmptyKey() { |
| return {DenseMapInfo<clang::pseudo::Item>::getEmptyKey()}; |
| } |
| static inline ItemSet getTombstoneKey() { |
| return {DenseMapInfo<clang::pseudo::Item>::getTombstoneKey()}; |
| } |
| static unsigned getHashValue(const ItemSet &I) { |
| return llvm::hash_combine_range(I.begin(), I.end()); |
| } |
| static bool isEqual(const ItemSet &LHS, const ItemSet &RHS) { |
| return LHS == RHS; |
| } |
| }; |
| } // namespace llvm |
| |
| namespace clang { |
| namespace pseudo { |
| namespace { |
| |
| struct SortByNextSymbol { |
| SortByNextSymbol(const Grammar &G) : G(G) {} |
| bool operator()(const Item &L, const Item &R) { |
| if (L.hasNext() && R.hasNext() && L.next(G) != R.next(G)) |
| return L.next(G) < R.next(G); |
| if (L.hasNext() != R.hasNext()) |
| return L.hasNext() < R.hasNext(); // a trailing dot is minimal. |
| return L < R; |
| } |
| const Grammar &G; |
| }; |
| |
| // Computes a closure of the given item set S: |
| // - extends the given S to contain all options for parsing next token; |
| // - nonterminals after a dot are recursively expanded into the begin-state |
| // of all production rules that produce that nonterminal; |
| // |
| // Given |
| // Grammar rules = [ _ := E, E := E - T, E := T, T := n, T := ( E ) ] |
| // Input = [ E := . T ] |
| // returns [ E := . T, T := . n, T := . ( E ) ] |
| State closure(ItemSet Queue, const Grammar &G) { |
| llvm::DenseSet<Item> InQueue = {Queue.begin(), Queue.end()}; |
| // We reuse the passed-by-value Queue as the final result, as it's already |
| // initialized to the right elements. |
| size_t ItIndex = 0; |
| while (ItIndex < Queue.size()) { |
| const Item &ExpandingItem = Queue[ItIndex]; |
| ++ItIndex; |
| if (!ExpandingItem.hasNext()) |
| continue; |
| |
| SymbolID NextSym = ExpandingItem.next(G); |
| if (pseudo::isToken(NextSym)) |
| continue; |
| auto RRange = G.table().Nonterminals[NextSym].RuleRange; |
| for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) { |
| Item NewItem = Item::start(RID, G); |
| if (InQueue.insert(NewItem).second) // new |
| Queue.push_back(std::move(NewItem)); |
| } |
| } |
| Queue.shrink_to_fit(); |
| llvm::sort(Queue, SortByNextSymbol(G)); |
| return {std::move(Queue)}; |
| } |
| |
| // Returns all next (with a dot advanced) kernel item sets, partitioned by the |
| // advanced symbol. |
| // |
| // Given |
| // S = [ E := . a b, E := E . - T ] |
| // returns [ |
| // {id(a), [ E := a . b ]}, |
| // {id(-), [ E := E - . T ]} |
| // ] |
| std::vector<std::pair<SymbolID, ItemSet>> |
| nextAvailableKernelItems(const State &S, const Grammar &G) { |
| std::vector<std::pair<SymbolID, ItemSet>> Results; |
| llvm::ArrayRef<Item> AllItems = S.Items; |
| AllItems = AllItems.drop_while([](const Item &I) { return !I.hasNext(); }); |
| while (!AllItems.empty()) { |
| SymbolID AdvancedSymbol = AllItems.front().next(G); |
| auto Batch = AllItems.take_while([AdvancedSymbol, &G](const Item &I) { |
| assert(I.hasNext()); |
| return I.next(G) == AdvancedSymbol; |
| }); |
| assert(!Batch.empty()); |
| AllItems = AllItems.drop_front(Batch.size()); |
| |
| // Advance a dot over the Symbol. |
| ItemSet Next; |
| for (const Item &I : Batch) |
| Next.push_back(I.advance()); |
| // sort the set to keep order determinism for hash computation. |
| llvm::sort(Next); |
| Results.push_back({AdvancedSymbol, std::move(Next)}); |
| } |
| return Results; |
| } |
| |
| std::vector<std::pair<ExtensionID, SymbolID>> |
| availableRecovery(const State &S, const Grammar &G) { |
| std::vector<std::pair<ExtensionID, SymbolID>> Result; |
| for (const Item &I : S.Items) { |
| const auto &Rule = G.lookupRule(I.rule()); |
| if (I.dot() != Rule.RecoveryIndex) |
| continue; |
| Result.push_back({Rule.Recovery, Rule.seq()[Rule.RecoveryIndex]}); |
| } |
| llvm::sort(Result); |
| Result.erase(std::unique(Result.begin(), Result.end()), Result.end()); |
| return Result; |
| } |
| |
| } // namespace |
| |
| std::string Item::dump(const Grammar &G) const { |
| const auto &Rule = G.lookupRule(RID); |
| auto ToNames = [&](llvm::ArrayRef<SymbolID> Syms) { |
| std::vector<llvm::StringRef> Results; |
| for (auto SID : Syms) |
| Results.push_back(G.symbolName(SID)); |
| return Results; |
| }; |
| return llvm::formatv("{0} := {1} • {2}{3}", G.symbolName(Rule.Target), |
| llvm::join(ToNames(Rule.seq().take_front(DotPos)), " "), |
| llvm::join(ToNames(Rule.seq().drop_front(DotPos)), " "), |
| Rule.RecoveryIndex == DotPos ? " [recovery]" : "") |
| .str(); |
| } |
| |
| std::string State::dump(const Grammar &G, unsigned Indent) const { |
| std::string Result; |
| llvm::raw_string_ostream OS(Result); |
| for (const auto &Item : Items) |
| OS.indent(Indent) << llvm::formatv("{0}\n", Item.dump(G)); |
| return OS.str(); |
| } |
| |
| std::string LRGraph::dumpForTests(const Grammar &G) const { |
| std::string Result; |
| llvm::raw_string_ostream OS(Result); |
| OS << "States:\n"; |
| for (StateID ID = 0; ID < States.size(); ++ID) { |
| OS << llvm::formatv("State {0}\n", ID); |
| OS << States[ID].dump(G, /*Indent*/ 4); |
| } |
| for (const auto &E : Edges) { |
| OS << llvm::formatv("{0} ->[{1}] {2}\n", E.Src, G.symbolName(E.Label), |
| E.Dst); |
| } |
| return OS.str(); |
| } |
| |
| LRGraph LRGraph::buildLR0(const Grammar &G) { |
| class Builder { |
| public: |
| Builder(const Grammar &G) : G(G) {} |
| |
| // Adds a given state if not existed. |
| std::pair<StateID, /*inserted*/ bool> insert(ItemSet KernelItems) { |
| assert(llvm::is_sorted(KernelItems) && |
| "Item must be sorted before inserting to a hash map!"); |
| auto It = StatesIndex.find(KernelItems); |
| if (It != StatesIndex.end()) |
| return {It->second, false}; |
| States.push_back(closure(KernelItems, G)); |
| StateID NextStateID = States.size() - 1; |
| StatesIndex.insert({std::move(KernelItems), NextStateID}); |
| return {NextStateID, true}; |
| } |
| |
| void insertEdge(StateID Src, StateID Dst, SymbolID Label) { |
| Edges.push_back({Src, Dst, Label}); |
| } |
| |
| void insertRecovery(StateID Src, ExtensionID Strategy, SymbolID Result) { |
| Recoveries.push_back({Src, Strategy, Result}); |
| } |
| |
| // Returns a state with the given id. |
| const State &find(StateID ID) const { |
| assert(ID < States.size()); |
| return States[ID]; |
| } |
| |
| void addStartState(SymbolID Sym, StateID State) { |
| StartStates.push_back({Sym, State}); |
| } |
| |
| LRGraph build() && { |
| States.shrink_to_fit(); |
| Edges.shrink_to_fit(); |
| Recoveries.shrink_to_fit(); |
| llvm::sort(StartStates); |
| StartStates.shrink_to_fit(); |
| return LRGraph(std::move(States), std::move(Edges), std::move(Recoveries), |
| std::move(StartStates)); |
| } |
| |
| private: |
| // Key is the **kernel** item sets. |
| llvm::DenseMap<ItemSet, /*index of States*/ size_t> StatesIndex; |
| std::vector<State> States; |
| std::vector<Edge> Edges; |
| std::vector<Recovery> Recoveries; |
| const Grammar &G; |
| std::vector<std::pair<SymbolID, StateID>> StartStates; |
| } Builder(G); |
| |
| std::vector<StateID> PendingStates; |
| // Initialize states with the start symbol. |
| auto RRange = G.table().Nonterminals[G.underscore()].RuleRange; |
| for (RuleID RID = RRange.Start; RID < RRange.End; ++RID) { |
| auto StartState = std::vector<Item>{Item::start(RID, G)}; |
| auto Result = Builder.insert(std::move(StartState)); |
| assert(Result.second && "State must be new"); |
| PendingStates.push_back(Result.first); |
| |
| const Rule &StartRule = G.lookupRule(RID); |
| assert(StartRule.Size == 2 && |
| StartRule.seq().back() == tokenSymbol(tok::eof) && |
| "Start rule must be of the form `_ := start-symbol EOF`!"); |
| Builder.addStartState(StartRule.seq().front(), Result.first); |
| } |
| |
| while (!PendingStates.empty()) { |
| auto StateID = PendingStates.back(); |
| PendingStates.pop_back(); |
| for (auto Next : nextAvailableKernelItems(Builder.find(StateID), G)) { |
| auto Insert = Builder.insert(Next.second); |
| if (Insert.second) // new state, insert to the pending queue. |
| PendingStates.push_back(Insert.first); |
| Builder.insertEdge(StateID, Insert.first, Next.first); |
| } |
| for (auto Recovery : availableRecovery(Builder.find(StateID), G)) |
| Builder.insertRecovery(StateID, Recovery.first, Recovery.second); |
| } |
| return std::move(Builder).build(); |
| } |
| |
| } // namespace pseudo |
| } // namespace clang |