|  | //===-- Background.cpp - Build an index in a background thread ------------===// | 
|  | // | 
|  | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
|  | // See https://llvm.org/LICENSE.txt for license information. | 
|  | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "index/Background.h" | 
|  | #include "Compiler.h" | 
|  | #include "Config.h" | 
|  | #include "Headers.h" | 
|  | #include "ParsedAST.h" | 
|  | #include "SourceCode.h" | 
|  | #include "Symbol.h" | 
|  | #include "URI.h" | 
|  | #include "index/BackgroundIndexLoader.h" | 
|  | #include "index/FileIndex.h" | 
|  | #include "index/Index.h" | 
|  | #include "index/IndexAction.h" | 
|  | #include "index/MemIndex.h" | 
|  | #include "index/Ref.h" | 
|  | #include "index/Relation.h" | 
|  | #include "index/Serialization.h" | 
|  | #include "index/SymbolCollector.h" | 
|  | #include "support/Context.h" | 
|  | #include "support/Logger.h" | 
|  | #include "support/Path.h" | 
|  | #include "support/Threading.h" | 
|  | #include "support/ThreadsafeFS.h" | 
|  | #include "support/Trace.h" | 
|  | #include "clang/Basic/SourceLocation.h" | 
|  | #include "clang/Basic/SourceManager.h" | 
|  | #include "clang/Driver/Types.h" | 
|  | #include "llvm/ADT/ArrayRef.h" | 
|  | #include "llvm/ADT/DenseSet.h" | 
|  | #include "llvm/ADT/Hashing.h" | 
|  | #include "llvm/ADT/STLExtras.h" | 
|  | #include "llvm/ADT/ScopeExit.h" | 
|  | #include "llvm/ADT/StringMap.h" | 
|  | #include "llvm/ADT/StringRef.h" | 
|  | #include "llvm/ADT/StringSet.h" | 
|  | #include "llvm/Support/Error.h" | 
|  | #include "llvm/Support/Path.h" | 
|  | #include "llvm/Support/Threading.h" | 
|  | #include "llvm/Support/xxhash.h" | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <atomic> | 
|  | #include <chrono> | 
|  | #include <condition_variable> | 
|  | #include <cstddef> | 
|  | #include <memory> | 
|  | #include <mutex> | 
|  | #include <numeric> | 
|  | #include <queue> | 
|  | #include <random> | 
|  | #include <string> | 
|  | #include <thread> | 
|  | #include <utility> | 
|  | #include <vector> | 
|  |  | 
|  | namespace clang { | 
|  | namespace clangd { | 
|  | namespace { | 
|  |  | 
|  | // We cannot use vfs->makeAbsolute because Cmd.FileName is either absolute or | 
|  | // relative to Cmd.Directory, which might not be the same as current working | 
|  | // directory. | 
|  | llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) { | 
|  | llvm::SmallString<128> AbsolutePath; | 
|  | if (llvm::sys::path::is_absolute(Cmd.Filename)) { | 
|  | AbsolutePath = Cmd.Filename; | 
|  | } else { | 
|  | AbsolutePath = Cmd.Directory; | 
|  | llvm::sys::path::append(AbsolutePath, Cmd.Filename); | 
|  | llvm::sys::path::remove_dots(AbsolutePath, true); | 
|  | } | 
|  | return AbsolutePath; | 
|  | } | 
|  |  | 
|  | bool shardIsStale(const LoadedShard &LS, llvm::vfs::FileSystem *FS) { | 
|  | auto Buf = FS->getBufferForFile(LS.AbsolutePath); | 
|  | if (!Buf) { | 
|  | vlog("Background-index: Couldn't read {0} to validate stored index: {1}", | 
|  | LS.AbsolutePath, Buf.getError().message()); | 
|  | // There is no point in indexing an unreadable file. | 
|  | return false; | 
|  | } | 
|  | return digest(Buf->get()->getBuffer()) != LS.Digest; | 
|  | } | 
|  |  | 
|  | } // namespace | 
|  |  | 
|  | BackgroundIndex::BackgroundIndex( | 
|  | const ThreadsafeFS &TFS, const GlobalCompilationDatabase &CDB, | 
|  | BackgroundIndexStorage::Factory IndexStorageFactory, Options Opts) | 
|  | : SwapIndex(std::make_unique<MemIndex>()), TFS(TFS), CDB(CDB), | 
|  | ContextProvider(std::move(Opts.ContextProvider)), | 
|  | IndexedSymbols(IndexContents::All), | 
|  | Rebuilder(this, &IndexedSymbols, Opts.ThreadPoolSize), | 
|  | IndexStorageFactory(std::move(IndexStorageFactory)), | 
|  | Queue(std::move(Opts.OnProgress)), | 
|  | CommandsChanged( | 
|  | CDB.watch([&](const std::vector<std::string> &ChangedFiles) { | 
|  | enqueue(ChangedFiles); | 
|  | })) { | 
|  | assert(Opts.ThreadPoolSize > 0 && "Thread pool size can't be zero."); | 
|  | assert(this->IndexStorageFactory && "Storage factory can not be null!"); | 
|  | for (unsigned I = 0; I < Opts.ThreadPoolSize; ++I) { | 
|  | ThreadPool.runAsync("background-worker-" + llvm::Twine(I + 1), | 
|  | [this, Ctx(Context::current().clone())]() mutable { | 
|  | WithContext BGContext(std::move(Ctx)); | 
|  | Queue.work([&] { Rebuilder.idle(); }); | 
|  | }); | 
|  | } | 
|  | } | 
|  |  | 
|  | BackgroundIndex::~BackgroundIndex() { | 
|  | stop(); | 
|  | ThreadPool.wait(); | 
|  | } | 
|  |  | 
|  | BackgroundQueue::Task BackgroundIndex::changedFilesTask( | 
|  | const std::vector<std::string> &ChangedFiles) { | 
|  | BackgroundQueue::Task T([this, ChangedFiles] { | 
|  | trace::Span Tracer("BackgroundIndexEnqueue"); | 
|  |  | 
|  | llvm::Optional<WithContext> WithProvidedContext; | 
|  | if (ContextProvider) | 
|  | WithProvidedContext.emplace(ContextProvider(/*Path=*/"")); | 
|  |  | 
|  | // We're doing this asynchronously, because we'll read shards here too. | 
|  | log("Enqueueing {0} commands for indexing", ChangedFiles.size()); | 
|  | SPAN_ATTACH(Tracer, "files", int64_t(ChangedFiles.size())); | 
|  |  | 
|  | auto NeedsReIndexing = loadProject(std::move(ChangedFiles)); | 
|  | // Run indexing for files that need to be updated. | 
|  | std::shuffle(NeedsReIndexing.begin(), NeedsReIndexing.end(), | 
|  | std::mt19937(std::random_device{}())); | 
|  | std::vector<BackgroundQueue::Task> Tasks; | 
|  | Tasks.reserve(NeedsReIndexing.size()); | 
|  | for (const auto &File : NeedsReIndexing) | 
|  | Tasks.push_back(indexFileTask(std::move(File))); | 
|  | Queue.append(std::move(Tasks)); | 
|  | }); | 
|  |  | 
|  | T.QueuePri = LoadShards; | 
|  | T.ThreadPri = llvm::ThreadPriority::Default; | 
|  | return T; | 
|  | } | 
|  |  | 
|  | static llvm::StringRef filenameWithoutExtension(llvm::StringRef Path) { | 
|  | Path = llvm::sys::path::filename(Path); | 
|  | return Path.drop_back(llvm::sys::path::extension(Path).size()); | 
|  | } | 
|  |  | 
|  | BackgroundQueue::Task BackgroundIndex::indexFileTask(std::string Path) { | 
|  | std::string Tag = filenameWithoutExtension(Path).str(); | 
|  | uint64_t Key = llvm::xxHash64(Path); | 
|  | BackgroundQueue::Task T([this, Path(std::move(Path))] { | 
|  | llvm::Optional<WithContext> WithProvidedContext; | 
|  | if (ContextProvider) | 
|  | WithProvidedContext.emplace(ContextProvider(Path)); | 
|  | auto Cmd = CDB.getCompileCommand(Path); | 
|  | if (!Cmd) | 
|  | return; | 
|  | if (auto Error = index(std::move(*Cmd))) | 
|  | elog("Indexing {0} failed: {1}", Path, std::move(Error)); | 
|  | }); | 
|  | T.QueuePri = IndexFile; | 
|  | T.Tag = std::move(Tag); | 
|  | T.Key = Key; | 
|  | return T; | 
|  | } | 
|  |  | 
|  | void BackgroundIndex::boostRelated(llvm::StringRef Path) { | 
|  | if (isHeaderFile(Path)) | 
|  | Queue.boost(filenameWithoutExtension(Path), IndexBoostedFile); | 
|  | } | 
|  |  | 
|  | /// Given index results from a TU, only update symbols coming from files that | 
|  | /// are different or missing from than \p ShardVersionsSnapshot. Also stores new | 
|  | /// index information on IndexStorage. | 
|  | void BackgroundIndex::update( | 
|  | llvm::StringRef MainFile, IndexFileIn Index, | 
|  | const llvm::StringMap<ShardVersion> &ShardVersionsSnapshot, | 
|  | bool HadErrors) { | 
|  | // Keys are URIs. | 
|  | llvm::StringMap<std::pair<Path, FileDigest>> FilesToUpdate; | 
|  | // Note that sources do not contain any information regarding missing headers, | 
|  | // since we don't even know what absolute path they should fall in. | 
|  | for (const auto &IndexIt : *Index.Sources) { | 
|  | const auto &IGN = IndexIt.getValue(); | 
|  | auto AbsPath = URI::resolve(IGN.URI, MainFile); | 
|  | if (!AbsPath) { | 
|  | elog("Failed to resolve URI: {0}", AbsPath.takeError()); | 
|  | continue; | 
|  | } | 
|  | const auto DigestIt = ShardVersionsSnapshot.find(*AbsPath); | 
|  | // File has different contents, or indexing was successful this time. | 
|  | if (DigestIt == ShardVersionsSnapshot.end() || | 
|  | DigestIt->getValue().Digest != IGN.Digest || | 
|  | (DigestIt->getValue().HadErrors && !HadErrors)) | 
|  | FilesToUpdate[IGN.URI] = {std::move(*AbsPath), IGN.Digest}; | 
|  | } | 
|  |  | 
|  | // Shard slabs into files. | 
|  | FileShardedIndex ShardedIndex(std::move(Index)); | 
|  |  | 
|  | // Build and store new slabs for each updated file. | 
|  | for (const auto &FileIt : FilesToUpdate) { | 
|  | auto Uri = FileIt.first(); | 
|  | auto IF = ShardedIndex.getShard(Uri); | 
|  | assert(IF && "no shard for file in Index.Sources?"); | 
|  | PathRef Path = FileIt.getValue().first; | 
|  |  | 
|  | // Only store command line hash for main files of the TU, since our | 
|  | // current model keeps only one version of a header file. | 
|  | if (Path != MainFile) | 
|  | IF->Cmd.reset(); | 
|  |  | 
|  | // We need to store shards before updating the index, since the latter | 
|  | // consumes slabs. | 
|  | // FIXME: Also skip serializing the shard if it is already up-to-date. | 
|  | if (auto Error = IndexStorageFactory(Path)->storeShard(Path, *IF)) | 
|  | elog("Failed to write background-index shard for file {0}: {1}", Path, | 
|  | std::move(Error)); | 
|  |  | 
|  | { | 
|  | std::lock_guard<std::mutex> Lock(ShardVersionsMu); | 
|  | const auto &Hash = FileIt.getValue().second; | 
|  | auto DigestIt = ShardVersions.try_emplace(Path); | 
|  | ShardVersion &SV = DigestIt.first->second; | 
|  | // Skip if file is already up to date, unless previous index was broken | 
|  | // and this one is not. | 
|  | if (!DigestIt.second && SV.Digest == Hash && SV.HadErrors && !HadErrors) | 
|  | continue; | 
|  | SV.Digest = Hash; | 
|  | SV.HadErrors = HadErrors; | 
|  |  | 
|  | // This can override a newer version that is added in another thread, if | 
|  | // this thread sees the older version but finishes later. This should be | 
|  | // rare in practice. | 
|  | IndexedSymbols.update( | 
|  | Uri, std::make_unique<SymbolSlab>(std::move(*IF->Symbols)), | 
|  | std::make_unique<RefSlab>(std::move(*IF->Refs)), | 
|  | std::make_unique<RelationSlab>(std::move(*IF->Relations)), | 
|  | Path == MainFile); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) { | 
|  | trace::Span Tracer("BackgroundIndex"); | 
|  | SPAN_ATTACH(Tracer, "file", Cmd.Filename); | 
|  | auto AbsolutePath = getAbsolutePath(Cmd); | 
|  |  | 
|  | auto FS = TFS.view(Cmd.Directory); | 
|  | auto Buf = FS->getBufferForFile(AbsolutePath); | 
|  | if (!Buf) | 
|  | return llvm::errorCodeToError(Buf.getError()); | 
|  | auto Hash = digest(Buf->get()->getBuffer()); | 
|  |  | 
|  | // Take a snapshot of the versions to avoid locking for each file in the TU. | 
|  | llvm::StringMap<ShardVersion> ShardVersionsSnapshot; | 
|  | { | 
|  | std::lock_guard<std::mutex> Lock(ShardVersionsMu); | 
|  | ShardVersionsSnapshot = ShardVersions; | 
|  | } | 
|  |  | 
|  | vlog("Indexing {0} (digest:={1})", Cmd.Filename, llvm::toHex(Hash)); | 
|  | ParseInputs Inputs; | 
|  | Inputs.TFS = &TFS; | 
|  | Inputs.CompileCommand = std::move(Cmd); | 
|  | IgnoreDiagnostics IgnoreDiags; | 
|  | auto CI = buildCompilerInvocation(Inputs, IgnoreDiags); | 
|  | if (!CI) | 
|  | return error("Couldn't build compiler invocation"); | 
|  |  | 
|  | auto Clang = | 
|  | prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr, | 
|  | std::move(*Buf), std::move(FS), IgnoreDiags); | 
|  | if (!Clang) | 
|  | return error("Couldn't build compiler instance"); | 
|  |  | 
|  | SymbolCollector::Options IndexOpts; | 
|  | // Creates a filter to not collect index results from files with unchanged | 
|  | // digests. | 
|  | IndexOpts.FileFilter = [&ShardVersionsSnapshot](const SourceManager &SM, | 
|  | FileID FID) { | 
|  | const auto *F = SM.getFileEntryForID(FID); | 
|  | if (!F) | 
|  | return false; // Skip invalid files. | 
|  | auto AbsPath = getCanonicalPath(F, SM); | 
|  | if (!AbsPath) | 
|  | return false; // Skip files without absolute path. | 
|  | auto Digest = digestFile(SM, FID); | 
|  | if (!Digest) | 
|  | return false; | 
|  | auto D = ShardVersionsSnapshot.find(*AbsPath); | 
|  | if (D != ShardVersionsSnapshot.end() && D->second.Digest == Digest && | 
|  | !D->second.HadErrors) | 
|  | return false; // Skip files that haven't changed, without errors. | 
|  | return true; | 
|  | }; | 
|  | IndexOpts.CollectMainFileRefs = true; | 
|  |  | 
|  | IndexFileIn Index; | 
|  | auto Action = createStaticIndexingAction( | 
|  | IndexOpts, [&](SymbolSlab S) { Index.Symbols = std::move(S); }, | 
|  | [&](RefSlab R) { Index.Refs = std::move(R); }, | 
|  | [&](RelationSlab R) { Index.Relations = std::move(R); }, | 
|  | [&](IncludeGraph IG) { Index.Sources = std::move(IG); }); | 
|  |  | 
|  | // We're going to run clang here, and it could potentially crash. | 
|  | // We could use CrashRecoveryContext to try to make indexing crashes nonfatal, | 
|  | // but the leaky "recovery" is pretty scary too in a long-running process. | 
|  | // If crashes are a real problem, maybe we should fork a child process. | 
|  |  | 
|  | const FrontendInputFile &Input = Clang->getFrontendOpts().Inputs.front(); | 
|  | if (!Action->BeginSourceFile(*Clang, Input)) | 
|  | return error("BeginSourceFile() failed"); | 
|  | if (llvm::Error Err = Action->Execute()) | 
|  | return Err; | 
|  |  | 
|  | Action->EndSourceFile(); | 
|  |  | 
|  | Index.Cmd = Inputs.CompileCommand; | 
|  | assert(Index.Symbols && Index.Refs && Index.Sources && | 
|  | "Symbols, Refs and Sources must be set."); | 
|  |  | 
|  | log("Indexed {0} ({1} symbols, {2} refs, {3} files)", | 
|  | Inputs.CompileCommand.Filename, Index.Symbols->size(), | 
|  | Index.Refs->numRefs(), Index.Sources->size()); | 
|  | SPAN_ATTACH(Tracer, "symbols", int(Index.Symbols->size())); | 
|  | SPAN_ATTACH(Tracer, "refs", int(Index.Refs->numRefs())); | 
|  | SPAN_ATTACH(Tracer, "sources", int(Index.Sources->size())); | 
|  |  | 
|  | bool HadErrors = Clang->hasDiagnostics() && | 
|  | Clang->getDiagnostics().hasUncompilableErrorOccurred(); | 
|  | if (HadErrors) { | 
|  | log("Failed to compile {0}, index may be incomplete", AbsolutePath); | 
|  | for (auto &It : *Index.Sources) | 
|  | It.second.Flags |= IncludeGraphNode::SourceFlag::HadErrors; | 
|  | } | 
|  | update(AbsolutePath, std::move(Index), ShardVersionsSnapshot, HadErrors); | 
|  |  | 
|  | Rebuilder.indexedTU(); | 
|  | return llvm::Error::success(); | 
|  | } | 
|  |  | 
|  | // Restores shards for \p MainFiles from index storage. Then checks staleness of | 
|  | // those shards and returns a list of TUs that needs to be indexed to update | 
|  | // staleness. | 
|  | std::vector<std::string> | 
|  | BackgroundIndex::loadProject(std::vector<std::string> MainFiles) { | 
|  | // Drop files where background indexing is disabled in config. | 
|  | if (ContextProvider) | 
|  | llvm::erase_if(MainFiles, [&](const std::string &TU) { | 
|  | // Load the config for each TU, as indexing may be selectively enabled. | 
|  | WithContext WithProvidedContext(ContextProvider(TU)); | 
|  | return Config::current().Index.Background == | 
|  | Config::BackgroundPolicy::Skip; | 
|  | }); | 
|  | Rebuilder.startLoading(); | 
|  | // Load shards for all of the mainfiles. | 
|  | const std::vector<LoadedShard> Result = | 
|  | loadIndexShards(MainFiles, IndexStorageFactory, CDB); | 
|  | size_t LoadedShards = 0; | 
|  | { | 
|  | // Update in-memory state. | 
|  | std::lock_guard<std::mutex> Lock(ShardVersionsMu); | 
|  | for (auto &LS : Result) { | 
|  | if (!LS.Shard) | 
|  | continue; | 
|  | auto SS = | 
|  | LS.Shard->Symbols | 
|  | ? std::make_unique<SymbolSlab>(std::move(*LS.Shard->Symbols)) | 
|  | : nullptr; | 
|  | auto RS = LS.Shard->Refs | 
|  | ? std::make_unique<RefSlab>(std::move(*LS.Shard->Refs)) | 
|  | : nullptr; | 
|  | auto RelS = | 
|  | LS.Shard->Relations | 
|  | ? std::make_unique<RelationSlab>(std::move(*LS.Shard->Relations)) | 
|  | : nullptr; | 
|  | ShardVersion &SV = ShardVersions[LS.AbsolutePath]; | 
|  | SV.Digest = LS.Digest; | 
|  | SV.HadErrors = LS.HadErrors; | 
|  | ++LoadedShards; | 
|  |  | 
|  | IndexedSymbols.update(URI::create(LS.AbsolutePath).toString(), | 
|  | std::move(SS), std::move(RS), std::move(RelS), | 
|  | LS.CountReferences); | 
|  | } | 
|  | } | 
|  | Rebuilder.loadedShard(LoadedShards); | 
|  | Rebuilder.doneLoading(); | 
|  |  | 
|  | auto FS = TFS.view(/*CWD=*/llvm::None); | 
|  | llvm::DenseSet<PathRef> TUsToIndex; | 
|  | // We'll accept data from stale shards, but ensure the files get reindexed | 
|  | // soon. | 
|  | for (auto &LS : Result) { | 
|  | if (!shardIsStale(LS, FS.get())) | 
|  | continue; | 
|  | PathRef TUForFile = LS.DependentTU; | 
|  | assert(!TUForFile.empty() && "File without a TU!"); | 
|  |  | 
|  | // FIXME: Currently, we simply schedule indexing on a TU whenever any of | 
|  | // its dependencies needs re-indexing. We might do it smarter by figuring | 
|  | // out a minimal set of TUs that will cover all the stale dependencies. | 
|  | // FIXME: Try looking at other TUs if no compile commands are available | 
|  | // for this TU, i.e TU was deleted after we performed indexing. | 
|  | TUsToIndex.insert(TUForFile); | 
|  | } | 
|  |  | 
|  | return {TUsToIndex.begin(), TUsToIndex.end()}; | 
|  | } | 
|  |  | 
|  | void BackgroundIndex::profile(MemoryTree &MT) const { | 
|  | IndexedSymbols.profile(MT.child("slabs")); | 
|  | // We don't want to mix memory used by index and symbols, so call base class. | 
|  | MT.child("index").addUsage(SwapIndex::estimateMemoryUsage()); | 
|  | } | 
|  | } // namespace clangd | 
|  | } // namespace clang |