| <!DOCTYPE HTML> |
| <html lang="en" class="light sidebar-visible" dir="ltr"> |
| <head> |
| <!-- Book generated using mdBook --> |
| <meta charset="UTF-8"> |
| <title>Incremental compilation in detail - Rust Compiler Development Guide</title> |
| |
| |
| <!-- Custom HTML head --> |
| |
| <meta name="description" content="A guide to developing the Rust compiler (rustc)"> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <meta name="theme-color" content="#ffffff"> |
| |
| <link rel="icon" href="../favicon.svg"> |
| <link rel="shortcut icon" href="../favicon.png"> |
| <link rel="stylesheet" href="../css/variables.css"> |
| <link rel="stylesheet" href="../css/general.css"> |
| <link rel="stylesheet" href="../css/chrome.css"> |
| <link rel="stylesheet" href="../css/print.css" media="print"> |
| |
| <!-- Fonts --> |
| <link rel="stylesheet" href="../FontAwesome/css/font-awesome.css"> |
| <link rel="stylesheet" href="../fonts/fonts.css"> |
| |
| <!-- Highlight.js Stylesheets --> |
| <link rel="stylesheet" id="highlight-css" href="../highlight.css"> |
| <link rel="stylesheet" id="tomorrow-night-css" href="../tomorrow-night.css"> |
| <link rel="stylesheet" id="ayu-highlight-css" href="../ayu-highlight.css"> |
| |
| <!-- Custom theme stylesheets --> |
| <link rel="stylesheet" href="../pagetoc.css"> |
| |
| |
| <!-- Provide site root and default themes to javascript --> |
| <script> |
| const path_to_root = "../"; |
| const default_light_theme = "light"; |
| const default_dark_theme = "navy"; |
| window.path_to_searchindex_js = "../searchindex.js"; |
| </script> |
| <!-- Start loading toc.js asap --> |
| <script src="../toc.js"></script> |
| </head> |
| <body> |
| <div id="mdbook-help-container"> |
| <div id="mdbook-help-popup"> |
| <h2 class="mdbook-help-title">Keyboard shortcuts</h2> |
| <div> |
| <p>Press <kbd>←</kbd> or <kbd>→</kbd> to navigate between chapters</p> |
| <p>Press <kbd>S</kbd> or <kbd>/</kbd> to search in the book</p> |
| <p>Press <kbd>?</kbd> to show this help</p> |
| <p>Press <kbd>Esc</kbd> to hide this help</p> |
| </div> |
| </div> |
| </div> |
| <div id="body-container"> |
| <!-- Work around some values being stored in localStorage wrapped in quotes --> |
| <script> |
| try { |
| let theme = localStorage.getItem('mdbook-theme'); |
| let sidebar = localStorage.getItem('mdbook-sidebar'); |
| |
| if (theme.startsWith('"') && theme.endsWith('"')) { |
| localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1)); |
| } |
| |
| if (sidebar.startsWith('"') && sidebar.endsWith('"')) { |
| localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1)); |
| } |
| } catch (e) { } |
| </script> |
| |
| <!-- Set the theme before any content is loaded, prevents flash --> |
| <script> |
| const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme; |
| let theme; |
| try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { } |
| if (theme === null || theme === undefined) { theme = default_theme; } |
| const html = document.documentElement; |
| html.classList.remove('light') |
| html.classList.add(theme); |
| html.classList.add("js"); |
| </script> |
| |
| <input type="checkbox" id="sidebar-toggle-anchor" class="hidden"> |
| |
| <!-- Hide / unhide sidebar before it is displayed --> |
| <script> |
| let sidebar = null; |
| const sidebar_toggle = document.getElementById("sidebar-toggle-anchor"); |
| if (document.body.clientWidth >= 1080) { |
| try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { } |
| sidebar = sidebar || 'visible'; |
| } else { |
| sidebar = 'hidden'; |
| sidebar_toggle.checked = false; |
| } |
| if (sidebar === 'visible') { |
| sidebar_toggle.checked = true; |
| } else { |
| html.classList.remove('sidebar-visible'); |
| } |
| </script> |
| |
| <nav id="sidebar" class="sidebar" aria-label="Table of contents"> |
| <!-- populated by js --> |
| <mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox> |
| <noscript> |
| <iframe class="sidebar-iframe-outer" src="../toc.html"></iframe> |
| </noscript> |
| <div id="sidebar-resize-handle" class="sidebar-resize-handle"> |
| <div class="sidebar-resize-indicator"></div> |
| </div> |
| </nav> |
| |
| <div id="page-wrapper" class="page-wrapper"> |
| |
| <div class="page"> |
| <div id="menu-bar-hover-placeholder"></div> |
| <div id="menu-bar" class="menu-bar sticky"> |
| <div class="left-buttons"> |
| <label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar"> |
| <i class="fa fa-bars"></i> |
| </label> |
| <button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list"> |
| <i class="fa fa-paint-brush"></i> |
| </button> |
| <ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu"> |
| <li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="light">Light</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li> |
| </ul> |
| <button id="search-toggle" class="icon-button" type="button" title="Search (`/`)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="/ s" aria-controls="searchbar"> |
| <i class="fa fa-search"></i> |
| </button> |
| </div> |
| |
| <h1 class="menu-title">Rust Compiler Development Guide</h1> |
| |
| <div class="right-buttons"> |
| <a href="../print.html" title="Print this book" aria-label="Print this book"> |
| <i id="print-button" class="fa fa-print"></i> |
| </a> |
| <a href="https://github.com/rust-lang/rustc-dev-guide" title="Git repository" aria-label="Git repository"> |
| <i id="git-repository-button" class="fa fa-github"></i> |
| </a> |
| <a href="https://github.com/rust-lang/rustc-dev-guide/edit/main/src/queries/incremental-compilation-in-detail.md" title="Suggest an edit" aria-label="Suggest an edit" rel="edit"> |
| <i id="git-edit-button" class="fa fa-edit"></i> |
| </a> |
| |
| </div> |
| </div> |
| |
| <div id="search-wrapper" class="hidden"> |
| <form id="searchbar-outer" class="searchbar-outer"> |
| <div class="search-wrapper"> |
| <input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header"> |
| <div class="spinner-wrapper"> |
| <i class="fa fa-spinner fa-spin"></i> |
| </div> |
| </div> |
| </form> |
| <div id="searchresults-outer" class="searchresults-outer hidden"> |
| <div id="searchresults-header" class="searchresults-header"></div> |
| <ul id="searchresults"> |
| </ul> |
| </div> |
| </div> |
| |
| <!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM --> |
| <script> |
| document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible'); |
| document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible'); |
| Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) { |
| link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1); |
| }); |
| </script> |
| |
| <div id="content" class="content"> |
| <main> |
| <h1 id="incremental-compilation-in-detail"><a class="header" href="#incremental-compilation-in-detail">Incremental compilation in detail</a></h1> |
| <p>The incremental compilation scheme is, in essence, a surprisingly |
| simple extension to the overall query system. It relies on the fact that:</p> |
| <ol> |
| <li>queries are pure functions -- given the same inputs, a query will always |
| yield the same result, and</li> |
| <li>the query model structures compilation in an acyclic graph that makes |
| dependencies between individual computations explicit.</li> |
| </ol> |
| <p>This chapter will explain how we can use these properties for making things |
| incremental and then goes on to discuss version implementation issues.</p> |
| <h2 id="a-basic-algorithm-for-incremental-query-evaluation"><a class="header" href="#a-basic-algorithm-for-incremental-query-evaluation">A Basic Algorithm For Incremental Query Evaluation</a></h2> |
| <p>As explained in the <a href="./query-evaluation-model-in-detail.html">query evaluation model primer</a>, query |
| invocations form a directed-acyclic graph. Here's the example from the |
| previous chapter again:</p> |
| <pre><code class="language-ignore"> list_of_all_hir_items <----------------------------- type_check_crate() |
| | |
| | |
| Hir(foo) <--- type_of(foo) <--- type_check_item(foo) <-------+ |
| | | |
| +-----------------+ | |
| | | |
| v | |
| Hir(bar) <--- type_of(bar) <--- type_check_item(bar) <-------+ |
| </code></pre> |
| <p>Since every access from one query to another has to go through the query |
| context, we can record these accesses and thus actually build this dependency |
| graph in memory. With dependency tracking enabled, when compilation is done, |
| we know which queries were invoked (the nodes of the graph) and for each |
| invocation, which other queries or input has gone into computing the query's |
| result (the edges of the graph).</p> |
| <p>Now suppose we change the source code of our program so that |
| HIR of <code>bar</code> looks different than before. Our goal is to only recompute |
| those queries that are actually affected by the change while re-using |
| the cached results of all the other queries. Given the dependency graph we can |
| do exactly that. For a given query invocation, the graph tells us exactly |
| what data has gone into computing its results, we just have to follow the |
| edges until we reach something that has changed. If we don't encounter |
| anything that has changed, we know that the query still would evaluate to |
| the same result we already have in our cache.</p> |
| <p>Taking the <code>type_of(foo)</code> invocation from above as an example, we can check |
| whether the cached result is still valid by following the edges to its |
| inputs. The only edge leads to <code>Hir(foo)</code>, an input that has not been affected |
| by the change. So we know that the cached result for <code>type_of(foo)</code> is still |
| valid.</p> |
| <p>The story is a bit different for <code>type_check_item(foo)</code>: We again walk the |
| edges and already know that <code>type_of(foo)</code> is fine. Then we get to |
| <code>type_of(bar)</code> which we have not checked yet, so we walk the edges of |
| <code>type_of(bar)</code> and encounter <code>Hir(bar)</code> which <em>has</em> changed. Consequently |
| the result of <code>type_of(bar)</code> might yield a different result than what we |
| have in the cache and, transitively, the result of <code>type_check_item(foo)</code> |
| might have changed too. We thus re-run <code>type_check_item(foo)</code>, which in |
| turn will re-run <code>type_of(bar)</code>, which will yield an up-to-date result |
| because it reads the up-to-date version of <code>Hir(bar)</code>. Also, we re-run |
| <code>type_check_item(bar)</code> because result of <code>type_of(bar)</code> might have changed.</p> |
| <h2 id="the-problem-with-the-basic-algorithm-false-positives"><a class="header" href="#the-problem-with-the-basic-algorithm-false-positives">The problem with the basic algorithm: false positives</a></h2> |
| <p>If you read the previous paragraph carefully you'll notice that it says that |
| <code>type_of(bar)</code> <em>might</em> have changed because one of its inputs has changed. |
| There's also the possibility that it might still yield exactly the same |
| result <em>even though</em> its input has changed. Consider an example with a |
| simple query that just computes the sign of an integer:</p> |
| <pre><code class="language-ignore"> IntValue(x) <---- sign_of(x) <--- some_other_query(x) |
| </code></pre> |
| <p>Let's say that <code>IntValue(x)</code> starts out as <code>1000</code> and then is set to <code>2000</code>. |
| Even though <code>IntValue(x)</code> is different in the two cases, <code>sign_of(x)</code> yields |
| the result <code>+</code> in both cases.</p> |
| <p>If we follow the basic algorithm, however, <code>some_other_query(x)</code> would have to |
| (unnecessarily) be re-evaluated because it transitively depends on a changed |
| input. Change detection yields a "false positive" in this case because it has |
| to conservatively assume that <code>some_other_query(x)</code> might be affected by that |
| changed input.</p> |
| <p>Unfortunately it turns out that the actual queries in the compiler are full |
| of examples like this and small changes to the input often potentially affect |
| very large parts of the output binaries. As a consequence, we had to make the |
| change detection system smarter and more accurate.</p> |
| <h2 id="improving-accuracy-the-red-green-algorithm"><a class="header" href="#improving-accuracy-the-red-green-algorithm">Improving accuracy: the red-green algorithm</a></h2> |
| <p>The "false positives" problem can be solved by interleaving change detection |
| and query re-evaluation. Instead of walking the graph all the way to the |
| inputs when trying to find out if some cached result is still valid, we can |
| check if a result has <em>actually</em> changed after we were forced to re-evaluate |
| it.</p> |
| <p>We call this algorithm the red-green algorithm because nodes |
| in the dependency graph are assigned the color green if we were able to prove |
| that its cached result is still valid and the color red if the result has |
| turned out to be different after re-evaluating it.</p> |
| <p>The meat of red-green change tracking is implemented in the try-mark-green |
| algorithm, that, you've guessed it, tries to mark a given node as green:</p> |
| <pre><code class="language-rust ignore">fn try_mark_green(tcx, current_node) -> bool { |
| |
| // Fetch the inputs to `current_node`, i.e. get the nodes that the direct |
| // edges from `node` lead to. |
| let dependencies = tcx.dep_graph.get_dependencies_of(current_node); |
| |
| // Now check all the inputs for changes |
| for dependency in dependencies { |
| |
| match tcx.dep_graph.get_node_color(dependency) { |
| Green => { |
| // This input has already been checked before and it has not |
| // changed; so we can go on to check the next one |
| } |
| Red => { |
| // We found an input that has changed. We cannot mark |
| // `current_node` as green without re-running the |
| // corresponding query. |
| return false |
| } |
| Unknown => { |
| // This is the first time we look at this node. Let's try |
| // to mark it green by calling try_mark_green() recursively. |
| if try_mark_green(tcx, dependency) { |
| // We successfully marked the input as green, on to the |
| // next. |
| } else { |
| // We could *not* mark the input as green. This means we |
| // don't know if its value has changed. In order to find |
| // out, we re-run the corresponding query now! |
| tcx.run_query_for(dependency); |
| |
| // Fetch and check the node color again. Running the query |
| // has forced it to either red (if it yielded a different |
| // result than we have in the cache) or green (if it |
| // yielded the same result). |
| match tcx.dep_graph.get_node_color(dependency) { |
| Red => { |
| // The input turned out to be red, so we cannot |
| // mark `current_node` as green. |
| return false |
| } |
| Green => { |
| // Re-running the query paid off! The result is the |
| // same as before, so this particular input does |
| // not invalidate `current_node`. |
| } |
| Unknown => { |
| // There is no way a node has no color after |
| // re-running the query. |
| panic!("unreachable") |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| // If we have gotten through the entire loop, it means that all inputs |
| // have turned out to be green. If all inputs are unchanged, it means |
| // that the query result corresponding to `current_node` cannot have |
| // changed either. |
| tcx.dep_graph.mark_green(current_node); |
| |
| true |
| }</code></pre> |
| <blockquote> |
| <p>NOTE: |
| The actual implementation can be found in |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/src/rustc_query_system/dep_graph/graph.rs.html"><code>compiler/rustc_query_system/src/dep_graph/graph.rs</code></a></p> |
| </blockquote> |
| <p>By using red-green marking we can avoid the devastating cumulative effect of |
| having false positives during change detection. Whenever a query is executed |
| in incremental mode, we first check if its already green. If not, we run |
| <code>try_mark_green()</code> on it. If it still isn't green after that, then we actually |
| invoke the query provider to re-compute the result. Re-computing the query might |
| then itself involve recursively invoking more queries, which can mean we come back |
| to the <code>try_mark_green()</code> algorithm for the dependencies recursively.</p> |
| <h2 id="the-real-world-how-persistence-makes-everything-complicated"><a class="header" href="#the-real-world-how-persistence-makes-everything-complicated">The real world: how persistence makes everything complicated</a></h2> |
| <p>The sections above described the underlying algorithm for incremental |
| compilation but because the compiler process exits after being finished and |
| takes the query context with its result cache with it into oblivion, we have to |
| persist data to disk, so the next compilation session can make use of it. |
| This comes with a whole new set of implementation challenges:</p> |
| <ul> |
| <li>The query result cache is stored to disk, so they are not readily available |
| for change comparison.</li> |
| <li>A subsequent compilation session will start off with new version of the code |
| that has arbitrary changes applied to it. All kinds of IDs and indices that |
| are generated from a global, sequential counter (e.g. <code>NodeId</code>, <code>DefId</code>, etc) |
| might have shifted, making the persisted results on disk not immediately |
| usable anymore because the same numeric IDs and indices might refer to |
| completely new things in the new compilation session.</li> |
| <li>Persisting things to disk comes at a cost, so not every tiny piece of |
| information should be actually cached in between compilation sessions. |
| Fixed-sized, plain-old-data is preferred to complex things that need to run |
| through an expensive (de-)serialization step.</li> |
| </ul> |
| <p>The following sections describe how the compiler solves these issues.</p> |
| <h3 id="a-question-of-stability-bridging-the-gap-between-compilation-sessions"><a class="header" href="#a-question-of-stability-bridging-the-gap-between-compilation-sessions">A Question Of Stability: Bridging The Gap Between Compilation Sessions</a></h3> |
| <p>As noted before, various IDs (like <code>DefId</code>) are generated by the compiler in a |
| way that depends on the contents of the source code being compiled. ID assignment |
| is usually deterministic, that is, if the exact same code is compiled twice, |
| the same things will end up with the same IDs. However, if something |
| changes, e.g. a function is added in the middle of a file, there is no |
| guarantee that anything will have the same ID as it had before.</p> |
| <p>As a consequence we cannot represent the data in our on-disk cache the same |
| way it is represented in memory. For example, if we just stored a piece |
| of type information like <code>TyKind::FnDef(DefId, &'tcx Substs<'tcx>)</code> (as we do |
| in memory) and then the contained <code>DefId</code> points to a different function in |
| a new compilation session we'd be in trouble.</p> |
| <p>The solution to this problem is to find "stable" forms for IDs which remain |
| valid in between compilation sessions. For the most important case, <code>DefId</code>s, |
| these are the so-called <code>DefPath</code>s. Each <code>DefId</code> has a |
| corresponding <code>DefPath</code> but in place of a numeric ID, a <code>DefPath</code> is based on |
| the path to the identified item, e.g. <code>std::collections::HashMap</code>. The |
| advantage of an ID like this is that it is not affected by unrelated changes. |
| For example, one can add a new function to <code>std::collections</code> but |
| <code>std::collections::HashMap</code> would still be <code>std::collections::HashMap</code>. A |
| <code>DefPath</code> is "stable" across changes made to the source code while a <code>DefId</code> |
| isn't.</p> |
| <p>There is also the <code>DefPathHash</code> which is just a 128-bit hash value of the |
| <code>DefPath</code>. The two contain the same information and we mostly use the |
| <code>DefPathHash</code> because it simpler to handle, being <code>Copy</code> and self-contained.</p> |
| <p>This principle of stable identifiers is used to make the data in the on-disk |
| cache resilient to source code changes. Instead of storing a <code>DefId</code>, we store |
| the <code>DefPathHash</code> and when we deserialize something from the cache, we map the |
| <code>DefPathHash</code> to the corresponding <code>DefId</code> in the <em>current</em> compilation session |
| (which is just a simple hash table lookup).</p> |
| <p>The <code>HirId</code>, used for identifying HIR components that don't have their own |
| <code>DefId</code>, is another such stable ID. It is (conceptually) a pair of a <code>DefPath</code> |
| and a <code>LocalId</code>, where the <code>LocalId</code> identifies something (e.g. a <code>hir::Expr</code>) |
| locally within its "owner" (e.g. a <code>hir::Item</code>). If the owner is moved around, |
| the <code>LocalId</code>s within it are still the same.</p> |
| <h3 id="checking-query-results-for-changes-hashstable-and-fingerprints"><a class="header" href="#checking-query-results-for-changes-hashstable-and-fingerprints">Checking query results for changes: <code>HashStable</code> and <code>Fingerprint</code>s</a></h3> |
| <p>In order to do red-green-marking we often need to check if the result of a |
| query has changed compared to the result it had during the previous |
| compilation session. There are two performance problems with this though:</p> |
| <ul> |
| <li>We'd like to avoid having to load the previous result from disk just for |
| doing the comparison. We already computed the new result and will use that. |
| Also loading a result from disk will "pollute" the interners with data that |
| is unlikely to ever be used.</li> |
| <li>We don't want to store each and every result in the on-disk cache. For |
| example, it would be wasted effort to persist things to disk that are |
| already available in upstream crates.</li> |
| </ul> |
| <p>The compiler avoids these problems by using so-called <code>Fingerprint</code>s. Each time |
| a new query result is computed, the query engine will compute a 128 bit hash |
| value of the result. We call this hash value "the <code>Fingerprint</code> of the query |
| result". The hashing is (and has to be) done "in a stable way". This means |
| that whenever something is hashed that might change in between compilation |
| sessions (e.g. a <code>DefId</code>), we instead hash its stable equivalent |
| (e.g. the corresponding <code>DefPath</code>). That's what the whole <code>HashStable</code> |
| infrastructure is for. This way <code>Fingerprint</code>s computed in two |
| different compilation sessions are still comparable.</p> |
| <p>The next step is to store these fingerprints along with the dependency graph. |
| This is cheap since fingerprints are just bytes to be copied. It's also cheap to |
| load the entire set of fingerprints together with the dependency graph.</p> |
| <p>Now, when red-green-marking reaches the point where it needs to check if a |
| result has changed, it can just compare the (already loaded) previous |
| fingerprint to the fingerprint of the new result.</p> |
| <p>This approach works rather well but it's not without flaws:</p> |
| <ul> |
| <li> |
| <p>There is a small possibility of hash collisions. That is, two different |
| results could have the same fingerprint and the system would erroneously |
| assume that the result hasn't changed, leading to a missed update.</p> |
| <p>We mitigate this risk by using a high-quality hash function and a 128 bit |
| wide hash value. Due to these measures the practical risk of a hash |
| collision is negligible.</p> |
| </li> |
| <li> |
| <p>Computing fingerprints is quite costly. It is the main reason why incremental |
| compilation can be slower than non-incremental compilation. We are forced to |
| use a good and thus expensive hash function, and we have to map things to |
| their stable equivalents while doing the hashing.</p> |
| </li> |
| </ul> |
| <h3 id="a-tale-of-two-depgraphs-the-old-and-the-new"><a class="header" href="#a-tale-of-two-depgraphs-the-old-and-the-new">A tale of two <code>DepGraph</code>s: the old and the new</a></h3> |
| <p>The initial description of dependency tracking glosses over a few details |
| that quickly become a head scratcher when actually trying to implement things. |
| In particular it's easy to overlook that we are actually dealing with <em>two</em> |
| dependency graphs: The one we built during the previous compilation session and |
| the one that we are building for the current compilation session.</p> |
| <p>When a compilation session starts, the compiler loads the previous dependency |
| graph into memory as an immutable piece of data. Then, when a query is invoked, |
| it will first try to mark the corresponding node in the graph as green. This |
| means really that we are trying to mark the node in the <em>previous</em> dep-graph |
| as green that corresponds to the query key in the <em>current</em> session. How do we |
| do this mapping between current query key and previous <code>DepNode</code>? The answer |
| is again <code>Fingerprint</code>s: Nodes in the dependency graph are identified by a |
| fingerprint of the query key. Since fingerprints are stable across compilation |
| sessions, computing one in the current session allows us to find a node |
| in the dependency graph from the previous session. If we don't find a node with |
| the given fingerprint, it means that the query key refers to something that |
| did not yet exist in the previous session.</p> |
| <p>So, having found the dep-node in the previous dependency graph, we can look |
| up its dependencies (i.e. also dep-nodes in the previous graph) and continue with |
| the rest of the try-mark-green algorithm. The next interesting thing happens |
| when we successfully marked the node as green. At that point we copy the node |
| and the edges to its dependencies from the old graph into the new graph. We |
| have to do this because the new dep-graph cannot acquire the |
| node and edges via the regular dependency tracking. The tracking system can |
| only record edges while actually running a query -- but running the query, |
| although we have the result already cached, is exactly what we want to avoid.</p> |
| <p>Once the compilation session has finished, all the unchanged parts have been |
| copied over from the old into the new dependency graph, while the changed parts |
| have been added to the new graph by the tracking system. At this point, the |
| new graph is serialized out to disk, alongside the query result cache, and can |
| act as the previous dep-graph in a subsequent compilation session.</p> |
| <h3 id="didnt-you-forget-something-cache-promotion"><a class="header" href="#didnt-you-forget-something-cache-promotion">Didn't you forget something?: cache promotion</a></h3> |
| <p>The system described so far has a somewhat subtle property: If all inputs of a |
| dep-node are green then the dep-node itself can be marked as green without |
| computing or loading the corresponding query result. Applying this property |
| transitively often leads to the situation that some intermediate results are |
| never actually loaded from disk, as in the following example:</p> |
| <pre><code class="language-ignore"> input(A) <-- intermediate_query(B) <-- leaf_query(C) |
| </code></pre> |
| <p>The compiler might need the value of <code>leaf_query(C)</code> in order to generate some |
| output artifact. If it can mark <code>leaf_query(C)</code> as green, it will load the |
| result from the on-disk cache. The result of <code>intermediate_query(B)</code> is never |
| loaded though. As a consequence, when the compiler persists the <em>new</em> result |
| cache by writing all in-memory query results to disk, <code>intermediate_query(B)</code> |
| will not be in memory and thus will be missing from the new result cache.</p> |
| <p>If there subsequently is another compilation session that actually needs the |
| result of <code>intermediate_query(B)</code> it will have to be re-computed even though we |
| had a perfectly valid result for it in the cache just before.</p> |
| <p>In order to prevent this from happening, the compiler does something called |
| "cache promotion": Before emitting the new result cache it will walk all green |
| dep-nodes and make sure that their query result is loaded into memory. That way |
| the result cache doesn't unnecessarily shrink again.</p> |
| <h1 id="incremental-compilation-and-the-compiler-backend"><a class="header" href="#incremental-compilation-and-the-compiler-backend">Incremental compilation and the compiler backend</a></h1> |
| <p>The compiler backend, the part involving LLVM, is using the query system but |
| it is not implemented in terms of queries itself. As a consequence it does not |
| automatically partake in dependency tracking. However, the manual integration |
| with the tracking system is pretty straight-forward. The compiler simply tracks |
| what queries get invoked when generating the initial LLVM version of each |
| codegen unit (CGU), which results in a dep-node for each CGU. In subsequent |
| compilation sessions it then tries to mark the dep-node for a CGU as green. If |
| it succeeds, it knows that the corresponding object and bitcode files on disk |
| are still valid. If it doesn't succeed, the entire CGU has to be recompiled.</p> |
| <p>This is the same approach that is used for regular queries. The main differences |
| are:</p> |
| <ul> |
| <li> |
| <p>that we cannot easily compute a fingerprint for LLVM modules (because |
| they are opaque C++ objects),</p> |
| </li> |
| <li> |
| <p>that the logic for dealing with cached values is rather different from |
| regular queries because here we have bitcode and object files instead of |
| serialized Rust values in the common result cache file, and</p> |
| </li> |
| <li> |
| <p>the operations around LLVM are so expensive in terms of computation time and |
| memory consumption that we need to have tight control over what is |
| executed when and what stays in memory for how long.</p> |
| </li> |
| </ul> |
| <p>The query system could probably be extended with general purpose mechanisms to |
| deal with all of the above but so far that seemed like more trouble than it |
| would save.</p> |
| <h2 id="query-modifiers"><a class="header" href="#query-modifiers">Query modifiers</a></h2> |
| <p>The query system allows for applying <a href="../query.html#adding-a-new-kind-of-query">modifiers</a> to queries. These |
| modifiers affect certain aspects of how the system treats the query with |
| respect to incremental compilation:</p> |
| <ul> |
| <li> |
| <p><code>eval_always</code> - A query with the <code>eval_always</code> attribute is re-executed |
| unconditionally during incremental compilation. I.e. the system will not |
| even try to mark the query's dep-node as green. This attribute has two use |
| cases:</p> |
| <ul> |
| <li> |
| <p><code>eval_always</code> queries can read inputs (from files, global state, etc). |
| They can also produce side effects like writing to files and changing global state.</p> |
| </li> |
| <li> |
| <p>Some queries are very likely to be re-evaluated because their result |
| depends on the entire source code. In this case <code>eval_always</code> can be used |
| as an optimization because the system can skip recording dependencies in |
| the first place.</p> |
| </li> |
| </ul> |
| </li> |
| <li> |
| <p><code>no_hash</code> - Applying <code>no_hash</code> to a query tells the system to not compute |
| the fingerprint of the query's result. This has two consequences:</p> |
| <ul> |
| <li> |
| <p>Not computing the fingerprint can save quite a bit of time because |
| fingerprinting is expensive, especially for large, complex values.</p> |
| </li> |
| <li> |
| <p>Without the fingerprint, the system has to unconditionally assume that |
| the result of the query has changed. As a consequence anything depending |
| on a <code>no_hash</code> query will always be re-executed.</p> |
| </li> |
| </ul> |
| <p>Using <code>no_hash</code> for a query can make sense in two circumstances:</p> |
| <ul> |
| <li> |
| <p>If the result of the query is very likely to change whenever one of its |
| inputs changes, e.g. a function like <code>|a, b, c| -> (a * b * c)</code>. In such |
| a case recomputing the query will always yield a red node if one of the |
| inputs is red so we can spare us the trouble and default to red immediately. |
| A counter example would be a function like <code>|a| -> (a == 42)</code> where the |
| result does not change for most changes of <code>a</code>.</p> |
| </li> |
| <li> |
| <p>If the result of a query is a big, monolithic collection (e.g. <code>index_hir</code>) |
| and there are "projection queries" reading from that collection |
| (e.g. <code>hir_owner</code>). In such a case the big collection will likely fulfill the |
| condition above (any changed input means recomputing the whole collection) |
| and the results of the projection queries will be hashed anyway. If we also |
| hashed the collection query it would mean that we effectively hash the same |
| data twice: once when hashing the collection and another time when hashing all |
| the projection query results. <code>no_hash</code> allows us to avoid that redundancy |
| and the projection queries act as a "firewall", shielding their dependents |
| from the unconditionally red <code>no_hash</code> node.</p> |
| </li> |
| </ul> |
| </li> |
| <li> |
| <p><code>cache_on_disk_if</code> - This attribute is what determines which query results |
| are persisted in the incremental compilation query result cache. The |
| attribute takes an expression that allows per query invocation |
| decisions. For example, it makes no sense to store values from upstream |
| crates in the cache because they are already available in the upstream |
| crate's metadata.</p> |
| </li> |
| <li> |
| <p><code>anon</code> - This attribute makes the system use "anonymous" dep-nodes for the |
| given query. An anonymous dep-node is not identified by the corresponding |
| query key, instead its ID is computed from the IDs of its dependencies. This |
| allows the red-green system to do its change detection even if there is no |
| query key available for a given dep-node -- something which is needed for |
| handling trait selection because it is not based on queries.</p> |
| </li> |
| </ul> |
| <h2 id="the-projection-query-pattern"><a class="header" href="#the-projection-query-pattern">The projection query pattern</a></h2> |
| <p>It's interesting to note that <code>eval_always</code> and <code>no_hash</code> can be used together |
| in the so-called "projection query" pattern. It is often the case that there is |
| one query that depends on the entirety of the compiler's input (e.g. the indexed HIR) |
| and another query that projects individual values out of this monolithic value |
| (e.g. a HIR item with a certain <code>DefId</code>). These projection queries allow for |
| building change propagation "firewalls" because even if the result of the |
| monolithic query changes (which it is very likely to do) the small projections |
| can still mostly be marked as green.</p> |
| <pre><code class="language-ignore"> +------------+ |
| | | +---------------+ +--------+ |
| | | <---------| projection(x) | <---------| foo(a) | |
| | | +---------------+ +--------+ |
| | | |
| | monolithic | +---------------+ +--------+ |
| | query | <---------| projection(y) | <---------| bar(b) | |
| | | +---------------+ +--------+ |
| | | |
| | | +---------------+ +--------+ |
| | | <---------| projection(z) | <---------| baz(c) | |
| | | +---------------+ +--------+ |
| +------------+ |
| </code></pre> |
| <p>Let's assume that the result <code>monolithic_query</code> changes so that also the result |
| of <code>projection(x)</code> has changed, i.e. both their dep-nodes are being marked as |
| red. As a consequence <code>foo(a)</code> needs to be re-executed; but <code>bar(b)</code> and |
| <code>baz(c)</code> can be marked as green. However, if <code>foo</code>, <code>bar</code>, and <code>baz</code> would have |
| directly depended on <code>monolithic_query</code> then all of them would have had to be |
| re-evaluated.</p> |
| <p>This pattern works even without <code>eval_always</code> and <code>no_hash</code> but the two |
| modifiers can be used to avoid unnecessary overhead. If the monolithic query |
| is likely to change at any minor modification of the compiler's input it makes |
| sense to mark it as <code>eval_always</code>, thus getting rid of its dependency tracking |
| cost. And it always makes sense to mark the monolithic query as <code>no_hash</code> |
| because we have the projections to take care of keeping things green as much |
| as possible.</p> |
| <h1 id="shortcomings-of-the-current-system"><a class="header" href="#shortcomings-of-the-current-system">Shortcomings of the current system</a></h1> |
| <p>There are many things that still can be improved.</p> |
| <h2 id="incrementality-of-on-disk-data-structures"><a class="header" href="#incrementality-of-on-disk-data-structures">Incrementality of on-disk data structures</a></h2> |
| <p>The current system is not able to update on-disk caches and the dependency graph |
| in-place. Instead it has to rewrite each file entirely in each compilation |
| session. The overhead of doing so is a few percent of total compilation time.</p> |
| <h2 id="unnecessary-data-dependencies"><a class="header" href="#unnecessary-data-dependencies">Unnecessary data dependencies</a></h2> |
| <p>Data structures used as query results could be factored in a way that removes |
| edges from the dependency graph. Especially "span" information is very volatile, |
| so including it in query result will increase the chance that the result won't |
| be reusable. See <a href="https://github.com/rust-lang/rust/issues/47389">https://github.com/rust-lang/rust/issues/47389</a> for more |
| information.</p> |
| |
| </main> |
| |
| <nav class="nav-wrapper" aria-label="Page navigation"> |
| <!-- Mobile navigation buttons --> |
| <a rel="prev" href="../queries/incremental-compilation.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left"> |
| <i class="fa fa-angle-left"></i> |
| </a> |
| |
| <a rel="next prefetch" href="../incrcomp-debugging.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right"> |
| <i class="fa fa-angle-right"></i> |
| </a> |
| |
| <div style="clear: both"></div> |
| </nav> |
| </div> |
| </div> |
| |
| <nav class="nav-wide-wrapper" aria-label="Page navigation"> |
| <a rel="prev" href="../queries/incremental-compilation.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left"> |
| <i class="fa fa-angle-left"></i> |
| </a> |
| |
| <a rel="next prefetch" href="../incrcomp-debugging.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right"> |
| <i class="fa fa-angle-right"></i> |
| </a> |
| </nav> |
| |
| </div> |
| |
| |
| |
| |
| <script> |
| window.playground_copyable = true; |
| </script> |
| |
| |
| <script src="../elasticlunr.min.js"></script> |
| <script src="../mark.min.js"></script> |
| <script src="../searcher.js"></script> |
| |
| <script src="../clipboard.min.js"></script> |
| <script src="../highlight.js"></script> |
| <script src="../book.js"></script> |
| |
| <!-- Custom JS scripts --> |
| <script src="../mermaid.min.js"></script> |
| <script src="../mermaid-init.js"></script> |
| <script src="../pagetoc.js"></script> |
| |
| |
| |
| </div> |
| </body> |
| </html> |