blob: 8253e20a104ea0e0605eb15aa2b0314617696af3 [file] [log] [blame]
<!DOCTYPE HTML>
<html lang="en" class="light sidebar-visible" dir="ltr">
<head>
<!-- Book generated using mdBook -->
<meta charset="UTF-8">
<title>Appendix A: Background topics - Rust Compiler Development Guide</title>
<!-- Custom HTML head -->
<meta name="description" content="A guide to developing the Rust compiler (rustc)">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="theme-color" content="#ffffff">
<link rel="icon" href="../favicon.svg">
<link rel="shortcut icon" href="../favicon.png">
<link rel="stylesheet" href="../css/variables.css">
<link rel="stylesheet" href="../css/general.css">
<link rel="stylesheet" href="../css/chrome.css">
<link rel="stylesheet" href="../css/print.css" media="print">
<!-- Fonts -->
<link rel="stylesheet" href="../FontAwesome/css/font-awesome.css">
<link rel="stylesheet" href="../fonts/fonts.css">
<!-- Highlight.js Stylesheets -->
<link rel="stylesheet" id="highlight-css" href="../highlight.css">
<link rel="stylesheet" id="tomorrow-night-css" href="../tomorrow-night.css">
<link rel="stylesheet" id="ayu-highlight-css" href="../ayu-highlight.css">
<!-- Custom theme stylesheets -->
<!-- Provide site root and default themes to javascript -->
<script>
const path_to_root = "../";
const default_light_theme = "light";
const default_dark_theme = "navy";
</script>
<!-- Start loading toc.js asap -->
<script src="../toc.js"></script>
</head>
<body>
<div id="body-container">
<!-- Work around some values being stored in localStorage wrapped in quotes -->
<script>
try {
let theme = localStorage.getItem('mdbook-theme');
let sidebar = localStorage.getItem('mdbook-sidebar');
if (theme.startsWith('"') && theme.endsWith('"')) {
localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1));
}
if (sidebar.startsWith('"') && sidebar.endsWith('"')) {
localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1));
}
} catch (e) { }
</script>
<!-- Set the theme before any content is loaded, prevents flash -->
<script>
const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme;
let theme;
try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { }
if (theme === null || theme === undefined) { theme = default_theme; }
const html = document.documentElement;
html.classList.remove('light')
html.classList.add(theme);
html.classList.add("js");
</script>
<input type="checkbox" id="sidebar-toggle-anchor" class="hidden">
<!-- Hide / unhide sidebar before it is displayed -->
<script>
let sidebar = null;
const sidebar_toggle = document.getElementById("sidebar-toggle-anchor");
if (document.body.clientWidth >= 1080) {
try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { }
sidebar = sidebar || 'visible';
} else {
sidebar = 'hidden';
}
sidebar_toggle.checked = sidebar === 'visible';
html.classList.remove('sidebar-visible');
html.classList.add("sidebar-" + sidebar);
</script>
<nav id="sidebar" class="sidebar" aria-label="Table of contents">
<!-- populated by js -->
<mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox>
<noscript>
<iframe class="sidebar-iframe-outer" src="../toc.html"></iframe>
</noscript>
<div id="sidebar-resize-handle" class="sidebar-resize-handle">
<div class="sidebar-resize-indicator"></div>
</div>
</nav>
<div id="page-wrapper" class="page-wrapper">
<div class="page">
<div id="menu-bar-hover-placeholder"></div>
<div id="menu-bar" class="menu-bar sticky">
<div class="left-buttons">
<label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar">
<i class="fa fa-bars"></i>
</label>
<button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list">
<i class="fa fa-paint-brush"></i>
</button>
<ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu">
<li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li>
<li role="none"><button role="menuitem" class="theme" id="light">Light</button></li>
<li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li>
<li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li>
<li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li>
<li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li>
</ul>
<button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar">
<i class="fa fa-search"></i>
</button>
</div>
<h1 class="menu-title">Rust Compiler Development Guide</h1>
<div class="right-buttons">
<a href="../print.html" title="Print this book" aria-label="Print this book">
<i id="print-button" class="fa fa-print"></i>
</a>
<a href="https://github.com/rust-lang/rustc-dev-guide" title="Git repository" aria-label="Git repository">
<i id="git-repository-button" class="fa fa-github"></i>
</a>
<a href="https://github.com/rust-lang/rustc-dev-guide/edit/master/src/appendix/background.md" title="Suggest an edit" aria-label="Suggest an edit">
<i id="git-edit-button" class="fa fa-edit"></i>
</a>
</div>
</div>
<div id="search-wrapper" class="hidden">
<form id="searchbar-outer" class="searchbar-outer">
<input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header">
</form>
<div id="searchresults-outer" class="searchresults-outer hidden">
<div id="searchresults-header" class="searchresults-header"></div>
<ul id="searchresults">
</ul>
</div>
</div>
<!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM -->
<script>
document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible');
document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible');
Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) {
link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1);
});
</script>
<div id="content" class="content">
<main>
<h1 id="background-topics"><a class="header" href="#background-topics">Background topics</a></h1>
<p>This section covers a numbers of common compiler terms that arise in
this guide. We try to give the general definition while providing some
Rust-specific context.</p>
<p><a id="cfg"></a></p>
<h2 id="what-is-a-control-flow-graph"><a class="header" href="#what-is-a-control-flow-graph">What is a control-flow graph?</a></h2>
<p>A control-flow graph (CFG) is a common term from compilers. If you've ever
used a flow-chart, then the concept of a control-flow graph will be
pretty familiar to you. It's a representation of your program that
clearly exposes the underlying control flow.</p>
<p>A control-flow graph is structured as a set of <strong>basic blocks</strong>
connected by edges. The key idea of a basic block is that it is a set
of statements that execute "together" – that is, whenever you branch
to a basic block, you start at the first statement and then execute
all the remainder. Only at the end of the block is there the
possibility of branching to more than one place (in MIR, we call that
final statement the <strong>terminator</strong>):</p>
<pre><code class="language-mir">bb0: {
statement0;
statement1;
statement2;
...
terminator;
}
</code></pre>
<p>Many expressions that you are used to in Rust compile down to multiple
basic blocks. For example, consider an if statement:</p>
<pre><code class="language-rust ignore">a = 1;
if some_variable {
b = 1;
} else {
c = 1;
}
d = 1;</code></pre>
<p>This would compile into four basic blocks in MIR. In textual form, it looks like
this:</p>
<pre><code class="language-mir">BB0: {
a = 1;
if some_variable {
goto BB1;
} else {
goto BB2;
}
}
BB1: {
b = 1;
goto BB3;
}
BB2: {
c = 1;
goto BB3;
}
BB3: {
d = 1;
...
}
</code></pre>
<p>In graphical form, it looks like this:</p>
<pre><code> BB0
+--------------------+
| a = 1; |
+--------------------+
/ \
if some_variable else
/ \
BB1 / \ BB2
+-----------+ +-----------+
| b = 1; | | c = 1; |
+-----------+ +-----------+
\ /
\ /
\ BB3 /
+----------+
| d = 1; |
| ... |
+----------+
</code></pre>
<p>When using a control-flow graph, a loop simply appears as a cycle in
the graph, and the <code>break</code> keyword translates into a path out of that
cycle.</p>
<p><a id="dataflow"></a></p>
<h2 id="what-is-a-dataflow-analysis"><a class="header" href="#what-is-a-dataflow-analysis">What is a dataflow analysis?</a></h2>
<p><a href="https://cs.au.dk/~amoeller/spa/"><em>Static Program Analysis</em></a> by Anders Møller
and Michael I. Schwartzbach is an incredible resource!</p>
<p><em>Dataflow analysis</em> is a type of static analysis that is common in many
compilers. It describes a general technique, rather than a particular analysis.</p>
<p>The basic idea is that we can walk over a <a href="#cfg">control-flow graph (CFG)</a> and
keep track of what some value could be. At the end of the walk, we might have
shown that some claim is true or not necessarily true (e.g. "this variable must
be initialized"). <code>rustc</code> tends to do dataflow analyses over the MIR, since MIR
is already a CFG.</p>
<p>For example, suppose we want to check that <code>x</code> is initialized before it is used
in this snippet:</p>
<pre><code class="language-rust ignore">fn foo() {
let mut x;
if some_cond {
x = 1;
}
dbg!(x);
}</code></pre>
<p>A CFG for this code might look like this:</p>
<pre><code class="language-txt"> +------+
| Init | (A)
+------+
| \
| if some_cond
else \ +-------+
| \| x = 1 | (B)
| +-------+
| /
+---------+
| dbg!(x) | (C)
+---------+
</code></pre>
<p>We can do the dataflow analysis as follows: we will start off with a flag <code>init</code>
which indicates if we know <code>x</code> is initialized. As we walk the CFG, we will
update the flag. At the end, we can check its value.</p>
<p>So first, in block (A), the variable <code>x</code> is declared but not initialized, so
<code>init = false</code>. In block (B), we initialize the value, so we know that <code>x</code> is
initialized. So at the end of (B), <code>init = true</code>.</p>
<p>Block (C) is where things get interesting. Notice that there are two incoming
edges, one from (A) and one from (B), corresponding to whether <code>some_cond</code> is true or not.
But we cannot know that! It could be the case the <code>some_cond</code> is always true,
so that <code>x</code> is actually always initialized. It could also be the case that
<code>some_cond</code> depends on something random (e.g. the time), so <code>x</code> may not be
initialized. In general, we cannot know statically (due to <a href="https://en.wikipedia.org/wiki/Rice%27s_theorem">Rice's
Theorem</a>). So what should the value of <code>init</code> be in block (C)?</p>
<p>Generally, in dataflow analyses, if a block has multiple parents (like (C) in
our example), its dataflow value will be some function of all its parents (and
of course, what happens in (C)). Which function we use depends on the analysis
we are doing.</p>
<p>In this case, we want to be able to prove definitively that <code>x</code> must be
initialized before use. This forces us to be conservative and assume that
<code>some_cond</code> might be false sometimes. So our "merging function" is "and". That
is, <code>init = true</code> in (C) if <code>init = true</code> in (A) <em>and</em> in (B) (or if <code>x</code> is
initialized in (C)). But this is not the case; in particular, <code>init = false</code> in
(A), and <code>x</code> is not initialized in (C). Thus, <code>init = false</code> in (C); we can
report an error that "<code>x</code> may not be initialized before use".</p>
<p>There is definitely a lot more that can be said about dataflow analyses. There is an
extensive body of research literature on the topic, including a lot of theory.
We only discussed a forwards analysis, but backwards dataflow analysis is also
useful. For example, rather than starting from block (A) and moving forwards,
we might have started with the usage of <code>x</code> and moved backwards to try to find
its initialization.</p>
<p><a id="quantified"></a></p>
<h2 id="what-is-universally-quantified-what-about-existentially-quantified"><a class="header" href="#what-is-universally-quantified-what-about-existentially-quantified">What is "universally quantified"? What about "existentially quantified"?</a></h2>
<p>In math, a predicate may be <em>universally quantified</em> or <em>existentially
quantified</em>:</p>
<ul>
<li><em>Universal</em> quantification:
<ul>
<li>the predicate holds if it is true for all possible inputs.</li>
<li>Traditional notation: ∀x: P(x). Read as "for all x, P(x) holds".</li>
</ul>
</li>
<li><em>Existential</em> quantification:
<ul>
<li>the predicate holds if there is any input where it is true, i.e., there
only has to be a single input.</li>
<li>Traditional notation: ∃x: P(x). Read as "there exists x such that P(x) holds".</li>
</ul>
</li>
</ul>
<p>In Rust, they come up in type checking and trait solving. For example,</p>
<pre><code class="language-rust ignore">fn foo&lt;T&gt;()</code></pre>
<p>This function claims that the function is well-typed for all types <code>T</code>: <code>∀ T: well_typed(foo)</code>.</p>
<p>Another example:</p>
<pre><code class="language-rust ignore">fn foo&lt;'a&gt;(_: &amp;'a usize)</code></pre>
<p>This function claims that for any lifetime <code>'a</code> (determined by the
caller), it is well-typed: <code>∀ 'a: well_typed(foo)</code>.</p>
<p>Another example:</p>
<pre><code class="language-rust ignore">fn foo&lt;F&gt;()
where for&lt;'a&gt; F: Fn(&amp;'a u8)</code></pre>
<p>This function claims that it is well-typed for all types <code>F</code> such that for all
lifetimes <code>'a</code>, <code>F: Fn(&amp;'a u8)</code>: <code>∀ F: ∀ 'a: (F: Fn(&amp;'a u8)) =&gt; well_typed(foo)</code>.</p>
<p>One more example:</p>
<pre><code class="language-rust ignore">fn foo(_: dyn Debug)</code></pre>
<p>This function claims that there exists some type <code>T</code> that implements <code>Debug</code>
such that the function is well-typed: <code>∃ T: (T: Debug) and well_typed(foo)</code>.</p>
<p><a id="variance"></a></p>
<h2 id="what-is-a-de-bruijn-index"><a class="header" href="#what-is-a-de-bruijn-index">What is a de Bruijn Index?</a></h2>
<p><a href="https://en.wikipedia.org/wiki/De_Bruijn_index">De Bruijn indices</a> are a way of representing, using only integers,
which variables are bound in which binders. They were originally invented for
use in lambda calculus evaluation (see <a href="https://en.wikipedia.org/wiki/De_Bruijn_index">this Wikipedia article</a> for
more). In <code>rustc</code>, we use de Bruijn indices to <a href="../ty_module/generic_arguments.html">represent generic types</a>.</p>
<p>Here is a basic example of how de Bruijn indices might be used for closures (we
don't actually do this in <code>rustc</code> though!):</p>
<pre><code class="language-rust ignore">|x| {
f(x) // de Bruijn index of `x` is 1 because `x` is bound 1 level up
|y| {
g(x, y) // index of `x` is 2 because it is bound 2 levels up
// index of `y` is 1 because it is bound 1 level up
}
}</code></pre>
<h2 id="what-are-co--and-contra-variance"><a class="header" href="#what-are-co--and-contra-variance">What are co- and contra-variance?</a></h2>
<p>Check out the subtyping chapter from the
<a href="https://doc.rust-lang.org/nomicon/subtyping.html">Rust Nomicon</a>.</p>
<p>See the <a href="../variance.html">variance</a> chapter of this guide for more info on how
the type checker handles variance.</p>
<p><a id="free-vs-bound"></a></p>
<h2 id="what-is-a-free-region-or-a-free-variable-what-about-bound-region"><a class="header" href="#what-is-a-free-region-or-a-free-variable-what-about-bound-region">What is a "free region" or a "free variable"? What about "bound region"?</a></h2>
<p>Let's describe the concepts of free vs bound in terms of program
variables, since that's the thing we're most familiar with.</p>
<ul>
<li>Consider this expression, which creates a closure: <code>|a, b| a + b</code>.
Here, the <code>a</code> and <code>b</code> in <code>a + b</code> refer to the arguments that the closure will
be given when it is called. We say that the <code>a</code> and <code>b</code> there are <strong>bound</strong> to
the closure, and that the closure signature <code>|a, b|</code> is a <strong>binder</strong> for the
names <code>a</code> and <code>b</code> (because any references to <code>a</code> or <code>b</code> within refer to the
variables that it introduces).</li>
<li>Consider this expression: <code>a + b</code>. In this expression, <code>a</code> and <code>b</code> refer to
local variables that are defined <em>outside</em> of the expression. We say that
those variables <strong>appear free</strong> in the expression (i.e., they are <strong>free</strong>,
not <strong>bound</strong> (tied up)).</li>
</ul>
<p>So there you have it: a variable "appears free" in some
expression/statement/whatever if it refers to something defined
outside of that expressions/statement/whatever. Equivalently, we can
then refer to the "free variables" of an expression – which is just
the set of variables that "appear free".</p>
<p>So what does this have to do with regions? Well, we can apply the
analogous concept to type and regions. For example, in the type <code>&amp;'a u32</code>, <code>'a</code> appears free. But in the type <code>for&lt;'a&gt; fn(&amp;'a u32)</code>, it
does not.</p>
<h1 id="further-reading-about-compilers"><a class="header" href="#further-reading-about-compilers">Further Reading About Compilers</a></h1>
<blockquote>
<p>Thanks to <code>mem</code>, <code>scottmcm</code>, and <code>Levi</code> on the official Discord for the
recommendations, and to <code>tinaun</code> for posting a link to a <a href="https://web.archive.org/web/20181230012554/https://twitter.com/graydon_pub/status/1039615569132118016">twitter thread from
Graydon Hoare</a>
which had some more recommendations!</p>
<p>Other sources: https://gcc.gnu.org/wiki/ListOfCompilerBooks</p>
<p>If you have other suggestions, please feel free to open an issue or PR.</p>
</blockquote>
<h2 id="books"><a class="header" href="#books">Books</a></h2>
<ul>
<li><a href="https://www.cis.upenn.edu/~bcpierce/tapl/">Types and Programming Languages</a></li>
<li><a href="https://www.cs.rochester.edu/~scott/pragmatics/">Programming Language Pragmatics</a></li>
<li><a href="https://www.cs.cmu.edu/~rwh/pfpl/">Practical Foundations for Programming Languages</a></li>
<li><a href="https://www.pearson.com/us/higher-education/program/Aho-Compilers-Principles-Techniques-and-Tools-2nd-Edition/PGM167067.html">Compilers: Principles, Techniques, and Tools, 2nd Edition</a></li>
<li><a href="https://www.cs.kent.ac.uk/people/staff/rej/gcbook/">Garbage Collection: Algorithms for Automatic Dynamic Memory Management</a></li>
<li><a href="https://www.amazon.com/Linkers-Kaufmann-Software-Engineering-Programming/dp/1558604960">Linkers and Loaders</a> (There are also free versions of this, but the version we had linked seems to be offline at the moment.)</li>
<li><a href="https://www.goodreads.com/book/show/887908.Advanced_Compiler_Design_and_Implementation">Advanced Compiler Design and Implementation</a></li>
<li><a href="https://www.goodreads.com/book/show/2063103.Building_an_Optimizing_Compiler">Building an Optimizing Compiler</a></li>
<li><a href="http://www.craftinginterpreters.com/">Crafting Interpreters</a></li>
</ul>
<h2 id="courses"><a class="header" href="#courses">Courses</a></h2>
<ul>
<li><a href="https://www.cs.uoregon.edu/research/summerschool/archives.html">University of Oregon Programming Languages Summer School archive</a></li>
</ul>
<h2 id="wikis"><a class="header" href="#wikis">Wikis</a></h2>
<ul>
<li><a href="https://en.wikipedia.org/wiki/List_of_programming_languages_by_type">Wikipedia</a></li>
<li><a href="https://esolangs.org/wiki/Main_Page">Esoteric Programming Languages</a></li>
<li><a href="https://plato.stanford.edu/index.html">Stanford Encyclopedia of Philosophy</a></li>
<li><a href="https://ncatlab.org/nlab/show/HomePage">nLab</a></li>
</ul>
<h2 id="misc-papers-and-blog-posts"><a class="header" href="#misc-papers-and-blog-posts">Misc Papers and Blog Posts</a></h2>
<ul>
<li><a href="https://www.cse.chalmers.se/research/group/logic/book/">Programming in Martin-Löf's Type Theory</a></li>
<li><a href="https://dl.acm.org/doi/10.1145/3093333.3009882">Polymorphism, Subtyping, and Type Inference in MLsub</a></li>
</ul>
</main>
<nav class="nav-wrapper" aria-label="Page navigation">
<!-- Mobile navigation buttons -->
<a rel="prev" href="../debugging-support-in-rustc.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../appendix/glossary.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
<div style="clear: both"></div>
</nav>
</div>
</div>
<nav class="nav-wide-wrapper" aria-label="Page navigation">
<a rel="prev" href="../debugging-support-in-rustc.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left">
<i class="fa fa-angle-left"></i>
</a>
<a rel="next prefetch" href="../appendix/glossary.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right">
<i class="fa fa-angle-right"></i>
</a>
</nav>
</div>
<script>
window.playground_copyable = true;
</script>
<script src="../elasticlunr.min.js"></script>
<script src="../mark.min.js"></script>
<script src="../searcher.js"></script>
<script src="../clipboard.min.js"></script>
<script src="../highlight.js"></script>
<script src="../book.js"></script>
<!-- Custom JS scripts -->
<script src="../mermaid.min.js"></script>
<script src="../mermaid-init.js"></script>
</div>
</body>
</html>