| <!DOCTYPE HTML> |
| <html lang="en" class="light sidebar-visible" dir="ltr"> |
| <head> |
| <!-- Book generated using mdBook --> |
| <meta charset="UTF-8"> |
| <title>Macro expansion - Rust Compiler Development Guide</title> |
| |
| |
| <!-- Custom HTML head --> |
| |
| <meta name="description" content="A guide to developing the Rust compiler (rustc)"> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <meta name="theme-color" content="#ffffff"> |
| |
| <link rel="icon" href="favicon.svg"> |
| <link rel="shortcut icon" href="favicon.png"> |
| <link rel="stylesheet" href="css/variables.css"> |
| <link rel="stylesheet" href="css/general.css"> |
| <link rel="stylesheet" href="css/chrome.css"> |
| <link rel="stylesheet" href="css/print.css" media="print"> |
| |
| <!-- Fonts --> |
| <link rel="stylesheet" href="FontAwesome/css/font-awesome.css"> |
| <link rel="stylesheet" href="fonts/fonts.css"> |
| |
| <!-- Highlight.js Stylesheets --> |
| <link rel="stylesheet" id="highlight-css" href="highlight.css"> |
| <link rel="stylesheet" id="tomorrow-night-css" href="tomorrow-night.css"> |
| <link rel="stylesheet" id="ayu-highlight-css" href="ayu-highlight.css"> |
| |
| <!-- Custom theme stylesheets --> |
| |
| |
| <!-- Provide site root and default themes to javascript --> |
| <script> |
| const path_to_root = ""; |
| const default_light_theme = "light"; |
| const default_dark_theme = "navy"; |
| </script> |
| <!-- Start loading toc.js asap --> |
| <script src="toc.js"></script> |
| </head> |
| <body> |
| <div id="body-container"> |
| <!-- Work around some values being stored in localStorage wrapped in quotes --> |
| <script> |
| try { |
| let theme = localStorage.getItem('mdbook-theme'); |
| let sidebar = localStorage.getItem('mdbook-sidebar'); |
| |
| if (theme.startsWith('"') && theme.endsWith('"')) { |
| localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1)); |
| } |
| |
| if (sidebar.startsWith('"') && sidebar.endsWith('"')) { |
| localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1)); |
| } |
| } catch (e) { } |
| </script> |
| |
| <!-- Set the theme before any content is loaded, prevents flash --> |
| <script> |
| const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme; |
| let theme; |
| try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { } |
| if (theme === null || theme === undefined) { theme = default_theme; } |
| const html = document.documentElement; |
| html.classList.remove('light') |
| html.classList.add(theme); |
| html.classList.add("js"); |
| </script> |
| |
| <input type="checkbox" id="sidebar-toggle-anchor" class="hidden"> |
| |
| <!-- Hide / unhide sidebar before it is displayed --> |
| <script> |
| let sidebar = null; |
| const sidebar_toggle = document.getElementById("sidebar-toggle-anchor"); |
| if (document.body.clientWidth >= 1080) { |
| try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { } |
| sidebar = sidebar || 'visible'; |
| } else { |
| sidebar = 'hidden'; |
| } |
| sidebar_toggle.checked = sidebar === 'visible'; |
| html.classList.remove('sidebar-visible'); |
| html.classList.add("sidebar-" + sidebar); |
| </script> |
| |
| <nav id="sidebar" class="sidebar" aria-label="Table of contents"> |
| <!-- populated by js --> |
| <mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox> |
| <noscript> |
| <iframe class="sidebar-iframe-outer" src="toc.html"></iframe> |
| </noscript> |
| <div id="sidebar-resize-handle" class="sidebar-resize-handle"> |
| <div class="sidebar-resize-indicator"></div> |
| </div> |
| </nav> |
| |
| <div id="page-wrapper" class="page-wrapper"> |
| |
| <div class="page"> |
| <div id="menu-bar-hover-placeholder"></div> |
| <div id="menu-bar" class="menu-bar sticky"> |
| <div class="left-buttons"> |
| <label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar"> |
| <i class="fa fa-bars"></i> |
| </label> |
| <button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list"> |
| <i class="fa fa-paint-brush"></i> |
| </button> |
| <ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu"> |
| <li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="light">Light</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li> |
| </ul> |
| <button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar"> |
| <i class="fa fa-search"></i> |
| </button> |
| </div> |
| |
| <h1 class="menu-title">Rust Compiler Development Guide</h1> |
| |
| <div class="right-buttons"> |
| <a href="print.html" title="Print this book" aria-label="Print this book"> |
| <i id="print-button" class="fa fa-print"></i> |
| </a> |
| <a href="https://github.com/rust-lang/rustc-dev-guide" title="Git repository" aria-label="Git repository"> |
| <i id="git-repository-button" class="fa fa-github"></i> |
| </a> |
| <a href="https://github.com/rust-lang/rustc-dev-guide/edit/master/src/macro-expansion.md" title="Suggest an edit" aria-label="Suggest an edit"> |
| <i id="git-edit-button" class="fa fa-edit"></i> |
| </a> |
| |
| </div> |
| </div> |
| |
| <div id="search-wrapper" class="hidden"> |
| <form id="searchbar-outer" class="searchbar-outer"> |
| <input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header"> |
| </form> |
| <div id="searchresults-outer" class="searchresults-outer hidden"> |
| <div id="searchresults-header" class="searchresults-header"></div> |
| <ul id="searchresults"> |
| </ul> |
| </div> |
| </div> |
| |
| <!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM --> |
| <script> |
| document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible'); |
| document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible'); |
| Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) { |
| link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1); |
| }); |
| </script> |
| |
| <div id="content" class="content"> |
| <main> |
| <h1 id="macro-expansion"><a class="header" href="#macro-expansion">Macro expansion</a></h1> |
| <ul> |
| <li><a href="#expansion-and-ast-integration">Expansion and AST Integration</a> |
| <ul> |
| <li><a href="#error-recovery">Error Recovery</a></li> |
| <li><a href="#name-resolution">Name Resolution</a></li> |
| <li><a href="#eager-expansion">Eager Expansion</a></li> |
| <li><a href="#other-data-structures">Other Data Structures</a></li> |
| </ul> |
| </li> |
| <li><a href="#hygiene-and-hierarchies">Hygiene and Hierarchies</a> |
| <ul> |
| <li><a href="#the-expansion-order-hierarchy">The Expansion Order Hierarchy</a></li> |
| <li><a href="#the-macro-definition-hierarchy">The Macro Definition Hierarchy</a></li> |
| <li><a href="#the-call-site-hierarchy">The Call-site Hierarchy</a></li> |
| <li><a href="#macro-backtraces">Macro Backtraces</a></li> |
| </ul> |
| </li> |
| <li><a href="#producing-macro-output">Producing Macro Output</a></li> |
| <li><a href="#macros-by-example">Macros By Example</a> |
| <ul> |
| <li><a href="#example">Example</a></li> |
| <li><a href="#the-mbe-parser">The MBE parser</a></li> |
| </ul> |
| </li> |
| <li><a href="#procedural-macros">Procedural Macros</a> |
| <ul> |
| <li><a href="#custom-derive">Custom Derive</a></li> |
| <li><a href="#macros-by-example-and-macros-20">Macros By Example and Macros 2.0</a></li> |
| </ul> |
| </li> |
| </ul> |
| <p>Rust has a very powerful macro system. In the previous chapter, we saw how |
| the parser sets aside macros to be expanded (using temporary <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/placeholders/index.html">placeholders</a>). |
| This chapter is about the process of expanding those macros iteratively until |
| we have a complete <a href="./ast-validation.html"><em>Abstract Syntax Tree</em> (AST)</a> for our crate with no |
| unexpanded macros (or a compile error).</p> |
| <p>First, we discuss the algorithm that expands and integrates macro output into |
| ASTs. Next, we take a look at how hygiene data is collected. Finally, we look |
| at the specifics of expanding different types of macros.</p> |
| <p>Many of the algorithms and data structures described below are in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/index.html"><code>rustc_expand</code></a>, |
| with fundamental data structures in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/base/index.html"><code>rustc_expand::base</code></a>.</p> |
| <p>Also of note, <code>cfg</code> and <code>cfg_attr</code> are treated specially from other macros, and are |
| handled in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/config/index.html"><code>rustc_expand::config</code></a>.</p> |
| <h2 id="expansion-and-ast-integration"><a class="header" href="#expansion-and-ast-integration">Expansion and AST Integration</a></h2> |
| <p>Firstly, expansion happens at the crate level. Given a raw source code for |
| a crate, the compiler will produce a massive AST with all macros expanded, all |
| modules inlined, etc. The primary entry point for this process is the |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/expand/struct.MacroExpander.html#method.fully_expand_fragment"><code>MacroExpander::fully_expand_fragment</code></a> method. With few exceptions, we |
| use this method on the whole crate (see <a href="#eager-expansion">"Eager Expansion"</a> |
| below for more detailed discussion of edge case expansion issues).</p> |
| <p>At a high level, <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/expand/struct.MacroExpander.html#method.fully_expand_fragment"><code>fully_expand_fragment</code></a> works in iterations. We keep a |
| queue of unresolved macro invocations (i.e. macros we haven't found the |
| definition of yet). We repeatedly try to pick a macro from the queue, resolve |
| it, expand it, and integrate it back. If we can't make progress in an |
| iteration, this represents a compile error. Here is the <a href="https://github.com/rust-lang/rust/pull/53778#issuecomment-419224049">algorithm</a>:</p> |
| <ol> |
| <li>Initialize a <code>queue</code> of unresolved macros.</li> |
| <li>Repeat until <code>queue</code> is empty (or we make no progress, which is an error): |
| <ol> |
| <li><a href="./name-resolution.html">Resolve</a> imports in our partially built crate as |
| much as possible.</li> |
| <li>Collect as many macro <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/expand/struct.Invocation.html"><code>Invocation</code>s</a> as possible from our |
| partially built crate (<code>fn</code>-like, attributes, derives) and add them to the |
| queue.</li> |
| <li>Dequeue the first element and attempt to resolve it.</li> |
| <li>If it's resolved: |
| <ol> |
| <li>Run the macro's expander function that consumes a <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/tokenstream/struct.TokenStream.html"><code>TokenStream</code></a> or |
| AST and produces a <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/tokenstream/struct.TokenStream.html"><code>TokenStream</code></a> or <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/expand/enum.AstFragment.html"><code>AstFragment</code></a> (depending on |
| the macro kind). (A <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/tokenstream/struct.TokenStream.html"><code>TokenStream</code></a> is a collection of <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/tokenstream/enum.TokenTree.html"><code>TokenTree</code>s</a>, |
| each of which are a token (punctuation, identifier, or literal) or a |
| delimited group (anything inside <code>()</code>/<code>[]</code>/<code>{}</code>)). |
| <ul> |
| <li>At this point, we know everything about the macro itself and can |
| call <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.LocalExpnId.html#method.set_expn_data"><code>set_expn_data</code></a> to fill in its properties in the global |
| data; that is the <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/index.html">hygiene</a> data associated with <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.ExpnId.html"><code>ExpnId</code></a> (see |
| <a href="#hygiene-and-hierarchies">Hygiene</a> below).</li> |
| </ul> |
| </li> |
| <li>Integrate that piece of AST into the currently-existing though |
| partially-built AST. This is essentially where the "token-like mass" |
| becomes a proper set-in-stone AST with side-tables. It happens as |
| follows: |
| <ul> |
| <li>If the macro produces tokens (e.g. a proc macro), we parse into |
| an AST, which may produce parse errors.</li> |
| <li>During expansion, we create <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.SyntaxContext.html"><code>SyntaxContext</code></a>s (hierarchy 2) (see |
| <a href="#hygiene-and-hierarchies">Hygiene</a> below).</li> |
| <li>These three passes happen one after another on every AST fragment |
| freshly expanded from a macro: |
| <ul> |
| <li><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/node_id/struct.NodeId.html"><code>NodeId</code></a>s are assigned by <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/expand/struct.InvocationCollector.html"><code>InvocationCollector</code></a>. This |
| also collects new macro calls from this new AST piece and |
| adds them to the queue.</li> |
| <li><a href="hir.html#identifiers-in-the-hir">"Def paths"</a> are created and <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_hir/def_id/struct.DefId.html"><code>DefId</code></a>s are |
| assigned to them by <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_resolve/def_collector/struct.DefCollector.html"><code>DefCollector</code></a>.</li> |
| <li>Names are put into modules (from the resolver's point of |
| view) by <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_resolve/build_reduced_graph/struct.BuildReducedGraphVisitor.html"><code>BuildReducedGraphVisitor</code></a>.</li> |
| </ul> |
| </li> |
| </ul> |
| </li> |
| <li>After expanding a single macro and integrating its output, continue |
| to the next iteration of <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/expand/struct.MacroExpander.html#method.fully_expand_fragment"><code>fully_expand_fragment</code></a>.</li> |
| </ol> |
| </li> |
| <li>If it's not resolved: |
| <ol> |
| <li>Put the macro back in the queue.</li> |
| <li>Continue to next iteration...</li> |
| </ol> |
| </li> |
| </ol> |
| </li> |
| </ol> |
| <h3 id="error-recovery"><a class="header" href="#error-recovery">Error Recovery</a></h3> |
| <p>If we make no progress in an iteration we have reached a compilation error |
| (e.g. an undefined macro). We attempt to recover from failures (i.e. |
| unresolved macros or imports) with the intent of generating diagnostics. |
| Failure recovery happens by expanding unresolved macros into |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/ast/enum.ExprKind.html#variant.Err"><code>ExprKind::Err</code></a> and allows compilation to continue past the first error |
| so that <code>rustc</code> can report more errors than just the original failure.</p> |
| <h3 id="name-resolution"><a class="header" href="#name-resolution">Name Resolution</a></h3> |
| <p>Notice that name resolution is involved here: we need to resolve imports and |
| macro names in the above algorithm. This is done in |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_resolve/macros/index.html"><code>rustc_resolve::macros</code></a>, which resolves macro paths, validates |
| those resolutions, and reports various errors (e.g. "not found", "found, but |
| it's unstable", "expected x, found y"). However, we don't try to resolve |
| other names yet. This happens later, as we will see in the chapter: <a href="./name-resolution.html">Name |
| Resolution</a>.</p> |
| <h3 id="eager-expansion"><a class="header" href="#eager-expansion">Eager Expansion</a></h3> |
| <p><em>Eager expansion</em> means we expand the arguments of a macro invocation before |
| the macro invocation itself. This is implemented only for a few special |
| built-in macros that expect literals; expanding arguments first for some of |
| these macro results in a smoother user experience. As an example, consider |
| the following:</p> |
| <pre><code class="language-rust ignore">macro bar($i: ident) { $i } |
| macro foo($i: ident) { $i } |
| |
| foo!(bar!(baz));</code></pre> |
| <p>A lazy-expansion would expand <code>foo!</code> first. An eager-expansion would expand |
| <code>bar!</code> first.</p> |
| <p>Eager-expansion is not a generally available feature of Rust. Implementing |
| eager-expansion more generally would be challenging, so we implement it for a |
| few special built-in macros for the sake of user-experience. The built-in |
| macros are implemented in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_builtin_macros/index.html"><code>rustc_builtin_macros</code></a>, along with some other |
| early code generation facilities like injection of standard library imports or |
| generation of test harness. There are some additional helpers for building |
| AST fragments in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/build/index.html"><code>rustc_expand::build</code></a>. Eager-expansion generally |
| performs a subset of the things that lazy (normal) expansion does. It is done |
| by invoking <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/expand/struct.MacroExpander.html#method.fully_expand_fragment"><code>fully_expand_fragment</code></a> on only part of a crate (as opposed |
| to the whole crate, like we normally do).</p> |
| <h3 id="other-data-structures"><a class="header" href="#other-data-structures">Other Data Structures</a></h3> |
| <p>Here are some other notable data structures involved in expansion and |
| integration:</p> |
| <ul> |
| <li><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/base/trait.ResolverExpand.html"><code>ResolverExpand</code></a> - a <code>trait</code> used to break crate dependencies. This allows the |
| resolver services to be used in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/index.html"><code>rustc_ast</code></a>, despite <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_resolve/index.html"><code>rustc_resolve</code></a> and |
| pretty much everything else depending on <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/index.html"><code>rustc_ast</code></a>.</li> |
| <li><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/base/struct.ExtCtxt.html"><code>ExtCtxt</code></a>/<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/base/struct.ExpansionData.html"><code>ExpansionData</code></a> - holds various intermediate expansion |
| infrastructure data.</li> |
| <li><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/base/enum.Annotatable.html"><code>Annotatable</code></a> - a piece of AST that can be an attribute target, almost the same |
| thing as <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/expand/enum.AstFragment.html"><code>AstFragment</code></a> except for types and patterns that can be produced by |
| macros but cannot be annotated with attributes.</li> |
| <li><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/base/trait.MacResult.html"><code>MacResult</code></a> - a "polymorphic" AST fragment, something that can turn into |
| a different <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/expand/enum.AstFragment.html"><code>AstFragment</code></a> depending on its <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/expand/enum.AstFragmentKind.html"><code>AstFragmentKind</code></a> (i.e. an item, |
| expression, pattern, etc).</li> |
| </ul> |
| <h2 id="hygiene-and-hierarchies"><a class="header" href="#hygiene-and-hierarchies">Hygiene and Hierarchies</a></h2> |
| <p>If you have ever used the C/C++ preprocessor macros, you know that there are some |
| annoying and hard-to-debug gotchas! For example, consider the following C code:</p> |
| <pre><code class="language-c">#define DEFINE_FOO struct Bar {int x;}; struct Foo {Bar bar;}; |
| |
| // Then, somewhere else |
| struct Bar { |
| ... |
| }; |
| |
| DEFINE_FOO |
| </code></pre> |
| <p>Most people avoid writing C like this – and for good reason: it doesn't |
| compile. The <code>struct Bar</code> defined by the macro clashes names with the <code>struct Bar</code> defined in the code. Consider also the following example:</p> |
| <pre><code class="language-c">#define DO_FOO(x) {\ |
| int y = 0;\ |
| foo(x, y);\ |
| } |
| |
| // Then elsewhere |
| int y = 22; |
| DO_FOO(y); |
| </code></pre> |
| <p>Do you see the problem? We wanted to generate a call <code>foo(22, 0)</code>, but instead |
| we got <code>foo(0, 0)</code> because the macro defined its own <code>y</code>!</p> |
| <p>These are both examples of <em>macro hygiene</em> issues. <em>Hygiene</em> relates to how to |
| handle names defined <em>within a macro</em>. In particular, a hygienic macro system |
| prevents errors due to names introduced within a macro. Rust macros are hygienic |
| in that they do not allow one to write the sorts of bugs above.</p> |
| <p>At a high level, hygiene within the Rust compiler is accomplished by keeping |
| track of the context where a name is introduced and used. We can then |
| disambiguate names based on that context. Future iterations of the macro system |
| will allow greater control to the macro author to use that context. For example, |
| a macro author may want to introduce a new name to the context where the macro |
| was called. Alternately, the macro author may be defining a variable for use |
| only within the macro (i.e. it should not be visible outside the macro).</p> |
| <p>The context is attached to AST nodes. All AST nodes generated by macros have |
| context attached. Additionally, there may be other nodes that have context |
| attached, such as some desugared syntax (non-macro-expanded nodes are |
| considered to just have the "root" context, as described below). |
| Throughout the compiler, we use <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/struct.Span.html"><code>rustc_span::Span</code>s</a> to refer to code locations. |
| This struct also has hygiene information attached to it, as we will see later.</p> |
| <p>Because macros invocations and definitions can be nested, the syntax context of |
| a node must be a hierarchy. For example, if we expand a macro and there is |
| another macro invocation or definition in the generated output, then the syntax |
| context should reflect the nesting.</p> |
| <p>However, it turns out that there are actually a few types of context we may |
| want to track for different purposes. Thus, there are not just one but <em>three</em> |
| expansion hierarchies that together comprise the hygiene information for a |
| crate.</p> |
| <p>All of these hierarchies need some sort of "macro ID" to identify individual |
| elements in the chain of expansions. This ID is <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.ExpnId.html"><code>ExpnId</code></a>. All macros receive |
| an integer ID, assigned continuously starting from 0 as we discover new macro |
| calls. All hierarchies start at <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.ExpnId.html#method.root"><code>ExpnId::root</code></a>, which is its own |
| parent.</p> |
| <p>The <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/index.html"><code>rustc_span::hygiene</code></a> crate contains all of the hygiene-related algorithms |
| (with the exception of some hacks in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_resolve/struct.Resolver.html#method.resolve_crate_root"><code>Resolver::resolve_crate_root</code></a>) |
| and structures related to hygiene and expansion that are kept in global data.</p> |
| <p>The actual hierarchies are stored in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.HygieneData.html"><code>HygieneData</code></a>. This is a global |
| piece of data containing hygiene and expansion info that can be accessed from |
| any <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/symbol/struct.Ident.html"><code>Ident</code></a> without any context.</p> |
| <h3 id="the-expansion-order-hierarchy"><a class="header" href="#the-expansion-order-hierarchy">The Expansion Order Hierarchy</a></h3> |
| <p>The first hierarchy tracks the order of expansions, i.e., when a macro |
| invocation is in the output of another macro.</p> |
| <p>Here, the children in the hierarchy will be the "innermost" tokens. The |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.ExpnData.html"><code>ExpnData</code></a> struct itself contains a subset of properties from both macro |
| definition and macro call available through global data. |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.ExpnData.html#structfield.parent"><code>ExpnData::parent</code></a> tracks the child-to-parent link in this hierarchy.</p> |
| <p>For example:</p> |
| <pre><code class="language-rust ignore">macro_rules! foo { () => { println!(); } } |
| |
| fn main() { foo!(); }</code></pre> |
| <p>In this code, the AST nodes that are finally generated would have hierarchy |
| <code>root -> id(foo) -> id(println)</code>.</p> |
| <h3 id="the-macro-definition-hierarchy"><a class="header" href="#the-macro-definition-hierarchy">The Macro Definition Hierarchy</a></h3> |
| <p>The second hierarchy tracks the order of macro definitions, i.e., when we are |
| expanding one macro another macro definition is revealed in its output. This |
| one is a bit tricky and more complex than the other two hierarchies.</p> |
| <p><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.SyntaxContext.html"><code>SyntaxContext</code></a> represents a whole chain in this hierarchy via an ID. |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.SyntaxContextData.html"><code>SyntaxContextData</code></a> contains data associated with the given |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.SyntaxContext.html"><code>SyntaxContext</code></a>; mostly it is a cache for results of filtering that chain in |
| different ways. <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.SyntaxContextData.html#structfield.parent"><code>SyntaxContextData::parent</code></a> is the child-to-parent |
| link here, and <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.SyntaxContextData.html#structfield.outer_expn"><code>SyntaxContextData::outer_expns</code></a> are individual |
| elements in the chain. The "chaining-operator" is |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.SyntaxContext.html#method.apply_mark"><code>SyntaxContext::apply_mark</code></a> in compiler code.</p> |
| <p>A <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/struct.Span.html"><code>Span</code></a>, mentioned above, is actually just a compact representation of |
| a code location and <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.SyntaxContext.html"><code>SyntaxContext</code></a>. Likewise, an <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/symbol/struct.Ident.html"><code>Ident</code></a> is just an interned |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/symbol/struct.Symbol.html"><code>Symbol</code></a> + <code>Span</code> (i.e. an interned string + hygiene data).</p> |
| <p>For built-in macros, we use the context: |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.SyntaxContext.html#method.apply_mark"><code>SyntaxContext::empty().apply_mark(expn_id)</code></a>, and such macros are |
| considered to be defined at the hierarchy root. We do the same for <code>proc macro</code>s because we haven't implemented cross-crate hygiene yet.</p> |
| <p>If the token had context <code>X</code> before being produced by a macro then after being |
| produced by the macro it has context <code>X -> macro_id</code>. Here are some examples:</p> |
| <p>Example 0:</p> |
| <pre><code class="language-rust ignore">macro m() { ident } |
| |
| m!();</code></pre> |
| <p>Here <code>ident</code> which initially has context <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.SyntaxContext.html#method.root"><code>SyntaxContext::root</code></a> has |
| context <code>ROOT -> id(m)</code> after it's produced by <code>m</code>.</p> |
| <p>Example 1:</p> |
| <pre><code class="language-rust ignore">macro m() { macro n() { ident } } |
| |
| m!(); |
| n!();</code></pre> |
| <p>In this example the <code>ident</code> has context <code>ROOT</code> initially, then <code>ROOT -> id(m)</code> |
| after the first expansion, then <code>ROOT -> id(m) -> id(n)</code>.</p> |
| <p>Example 2:</p> |
| <p>Note that these chains are not entirely determined by their last element, in |
| other words <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.ExpnId.html"><code>ExpnId</code></a> is not isomorphic to <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.SyntaxContext.html"><code>SyntaxContext</code></a>.</p> |
| <pre><code class="language-rust ignore">macro m($i: ident) { macro n() { ($i, bar) } } |
| |
| m!(foo);</code></pre> |
| <p>After all expansions, <code>foo</code> has context <code>ROOT -> id(n)</code> and <code>bar</code> has context |
| <code>ROOT -> id(m) -> id(n)</code>.</p> |
| <p>Currently this hierarchy for tracking macro definitions is subject to the |
| so-called <a href="https://github.com/rust-lang/rust/pull/51762#issuecomment-401400732">"context transplantation hack"</a>. Modern (i.e. experimental) |
| macros have stronger hygiene than the legacy "Macros By Example" (MBE) |
| system which can result in weird interactions between the two. The hack is |
| intended to make things "just work" for now.</p> |
| <h3 id="the-call-site-hierarchy"><a class="header" href="#the-call-site-hierarchy">The Call-site Hierarchy</a></h3> |
| <p>The third and final hierarchy tracks the location of macro invocations.</p> |
| <p>In this hierarchy <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/struct.ExpnData.html#structfield.call_site"><code>ExpnData::call_site</code></a> is the <code>child -> parent</code> |
| link.</p> |
| <p>Here is an example:</p> |
| <pre><code class="language-rust ignore">macro bar($i: ident) { $i } |
| macro foo($i: ident) { $i } |
| |
| foo!(bar!(baz));</code></pre> |
| <p>For the <code>baz</code> AST node in the final output, the expansion-order hierarchy is |
| <code>ROOT -> id(foo) -> id(bar) -> baz</code>, while the call-site hierarchy is <code>ROOT -> baz</code>.</p> |
| <h3 id="macro-backtraces"><a class="header" href="#macro-backtraces">Macro Backtraces</a></h3> |
| <p>Macro backtraces are implemented in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/index.html"><code>rustc_span</code></a> using the hygiene machinery |
| in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_span/hygiene/index.html"><code>rustc_span::hygiene</code></a>.</p> |
| <h2 id="producing-macro-output"><a class="header" href="#producing-macro-output">Producing Macro Output</a></h2> |
| <p>Above, we saw how the output of a macro is integrated into the AST for a crate, |
| and we also saw how the hygiene data for a crate is generated. But how do we |
| actually produce the output of a macro? It depends on the type of macro.</p> |
| <p>There are two types of macros in Rust:</p> |
| <ol> |
| <li><code>macro_rules!</code> macros (a.k.a. "Macros By Example" (MBE)), and,</li> |
| <li>procedural macros (proc macros); including custom derives.</li> |
| </ol> |
| <p>During the parsing phase, the normal Rust parser will set aside the contents of |
| macros and their invocations. Later, macros are expanded using these |
| portions of the code.</p> |
| <p>Some important data structures/interfaces here:</p> |
| <ul> |
| <li><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/base/struct.SyntaxExtension.html"><code>SyntaxExtension</code></a> - a lowered macro representation, contains its expander |
| function, which transforms a <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/tokenstream/struct.TokenStream.html"><code>TokenStream</code></a> or AST into another |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/tokenstream/struct.TokenStream.html"><code>TokenStream</code></a> or AST + some additional data like stability, or a list of |
| unstable features allowed inside the macro.</li> |
| <li><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/base/enum.SyntaxExtensionKind.html"><code>SyntaxExtensionKind</code></a> - expander functions may have several different |
| signatures (take one token stream, or two, or a piece of AST, etc). This is |
| an <code>enum</code> that lists them.</li> |
| <li><a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/base/trait.BangProcMacro.html"><code>BangProcMacro</code></a>/<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/base/trait.TTMacroExpander.html"><code>TTMacroExpander</code></a>/<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/base/trait.AttrProcMacro.html"><code>AttrProcMacro</code></a>/<a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/base/trait.MultiItemModifier.html"><code>MultiItemModifier</code></a> - |
| <code>trait</code>s representing the expander function signatures.</li> |
| </ul> |
| <h2 id="macros-by-example"><a class="header" href="#macros-by-example">Macros By Example</a></h2> |
| <p>MBEs have their own parser distinct from the Rust parser. When macros are |
| expanded, we may invoke the MBE parser to parse and expand a macro. The |
| MBE parser, in turn, may call the Rust parser when it needs to bind a |
| metavariable (e.g. <code>$my_expr</code>) while parsing the contents of a macro |
| invocation. The code for macro expansion is in |
| <a href="https://github.com/rust-lang/rust/tree/master/compiler/rustc_expand/src/mbe"><code>compiler/rustc_expand/src/mbe/</code></a>.</p> |
| <h3 id="example"><a class="header" href="#example">Example</a></h3> |
| <pre><code class="language-rust ignore">macro_rules! printer { |
| (print $mvar:ident) => { |
| println!("{}", $mvar); |
| }; |
| (print twice $mvar:ident) => { |
| println!("{}", $mvar); |
| println!("{}", $mvar); |
| }; |
| }</code></pre> |
| <p>Here <code>$mvar</code> is called a <em>metavariable</em>. Unlike normal variables, rather than |
| binding to a value <em>at runtime</em>, a metavariable binds <em>at compile time</em> to a |
| tree of <em>tokens</em>. A <em>token</em> is a single "unit" of the grammar, such as an |
| identifier (e.g. <code>foo</code>) or punctuation (e.g. <code>=></code>). There are also other |
| special tokens, such as <code>EOF</code>, which its self indicates that there are no more |
| tokens. There are token trees resulting from the paired parentheses-like |
| characters (<code>(</code>...<code>)</code>, <code>[</code>...<code>]</code>, and <code>{</code>...<code>}</code>) – they include the open and |
| close and all the tokens in between (Rust requires that parentheses-like |
| characters be balanced). Having macro expansion operate on token streams |
| rather than the raw bytes of a source-file abstracts away a lot of complexity. |
| The macro expander (and much of the rest of the compiler) doesn't consider |
| the exact line and column of some syntactic construct in the code; it considers |
| which constructs are used in the code. Using tokens allows us to care about |
| <em>what</em> without worrying about <em>where</em>. For more information about tokens, see |
| the <a href="./the-parser.html">Parsing</a> chapter of this book.</p> |
| <pre><code class="language-rust ignore">printer!(print foo); // `foo` is a variable</code></pre> |
| <p>The process of expanding the macro invocation into the syntax tree |
| <code>println!("{}", foo)</code> and then expanding the syntax tree into a call to |
| <code>Display::fmt</code> is one common example of <em>macro expansion</em>.</p> |
| <h3 id="the-mbe-parser"><a class="header" href="#the-mbe-parser">The MBE parser</a></h3> |
| <p>There are two parts to MBE expansion done by the macro parser:</p> |
| <ol> |
| <li>parsing the definition, and,</li> |
| <li>parsing the invocations.</li> |
| </ol> |
| <p>We think of the MBE parser as a nondeterministic finite automaton (NFA) based |
| regex parser since it uses an algorithm similar in spirit to the <a href="https://en.wikipedia.org/wiki/Earley_parser">Earley |
| parsing algorithm</a>. The macro |
| parser is defined in |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/mbe/macro_parser"><code>compiler/rustc_expand/src/mbe/macro_parser.rs</code></a>.</p> |
| <p>The interface of the macro parser is as follows (this is slightly simplified):</p> |
| <pre><code class="language-rust ignore">fn parse_tt( |
| &mut self, |
| parser: &mut Cow<'_, Parser<'_>>, |
| matcher: &[MatcherLoc] |
| ) -> ParseResult</code></pre> |
| <p>We use these items in macro parser:</p> |
| <ul> |
| <li>a <code>parser</code> variable is a reference to the state of a normal Rust parser, |
| including the token stream and parsing session. The token stream is what we |
| are about to ask the MBE parser to parse. We will consume the raw stream of |
| tokens and output a binding of metavariables to corresponding token trees. |
| The parsing session can be used to report parser errors.</li> |
| <li>a <code>matcher</code> variable is a sequence of <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/mbe/macro_parser/enum.MatcherLoc.html"><code>MatcherLoc</code></a>s that we want to match |
| the token stream against. They're converted from token trees before matching.</li> |
| </ul> |
| <p>In the analogy of a regex parser, the token stream is the input and we are |
| matching it against the pattern defined by matcher. Using our examples, the |
| token stream could be the stream of tokens containing the inside of the example |
| invocation <code>print foo</code>, while matcher might be the sequence of token (trees) |
| <code>print $mvar:ident</code>.</p> |
| <p>The output of the parser is a <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/mbe/macro_parser/enum.ParseResult.html"><code>ParseResult</code></a>, which indicates which of |
| three cases has occurred:</p> |
| <ul> |
| <li><strong>Success</strong>: the token stream matches the given matcher and we have produced a |
| binding from metavariables to the corresponding token trees.</li> |
| <li><strong>Failure</strong>: the token stream does not match matcher and results in an error |
| message such as "No rule expected token ...".</li> |
| <li><strong>Error</strong>: some fatal error has occurred <em>in the parser</em>. For example, this |
| happens if there is more than one pattern match, since that indicates the |
| macro is ambiguous.</li> |
| </ul> |
| <p>The full interface is defined <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/mbe/macro_parser/struct.TtParser.html#method.parse_tt">here</a>.</p> |
| <p>The macro parser does pretty much exactly the same as a normal regex parser |
| with one exception: in order to parse different types of metavariables, such as |
| <code>ident</code>, <code>block</code>, <code>expr</code>, etc., the macro parser must call back to the normal |
| Rust parser. Both the definition and invocation of macros are parsed using |
| the parser in a process which is non-intuitively self-referential.</p> |
| <p>The code to parse macro <em>definitions</em> is in |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/mbe/macro_rules"><code>compiler/rustc_expand/src/mbe/macro_rules.rs</code></a>. It defines the |
| pattern for matching a macro definition as <code>$( $lhs:tt => $rhs:tt );+</code>. In |
| other words, a <code>macro_rules</code> definition should have in its body at least one |
| occurrence of a token tree followed by <code>=></code> followed by another token tree. |
| When the compiler comes to a <code>macro_rules</code> definition, it uses this pattern to |
| match the two token trees per the rules of the definition of the macro, <em>thereby |
| utilizing the macro parser itself</em>. In our example definition, the |
| metavariable <code>$lhs</code> would match the patterns of both arms: <code>(print $mvar:ident)</code> and <code>(print twice $mvar:ident)</code>. And <code>$rhs</code> would match the |
| bodies of both arms: <code>{ println!("{}", $mvar); }</code> and <code>{ println!("{}", $mvar); println!("{}", $mvar); }</code>. The parser keeps this knowledge around for when it |
| needs to expand a macro invocation.</p> |
| <p>When the compiler comes to a macro invocation, it parses that invocation using |
| a NFA-based macro parser described above. However, the matcher variable |
| used is the first token tree (<code>$lhs</code>) extracted from the arms of the macro |
| <em>definition</em>. Using our example, we would try to match the token stream <code>print foo</code> from the invocation against the matchers <code>print $mvar:ident</code> and <code>print twice $mvar:ident</code> that we previously extracted from the definition. The |
| algorithm is exactly the same, but when the macro parser comes to a place in the |
| current matcher where it needs to match a <em>non-terminal</em> (e.g. <code>$mvar:ident</code>), |
| it calls back to the normal Rust parser to get the contents of that |
| non-terminal. In this case, the Rust parser would look for an <code>ident</code> token, |
| which it finds (<code>foo</code>) and returns to the macro parser. Then, the macro parser |
| proceeds in parsing as normal. Also, note that exactly one of the matchers from |
| the various arms should match the invocation; if there is more than one match, |
| the parse is ambiguous, while if there are no matches at all, there is a syntax |
| error.</p> |
| <p>For more information about the macro parser's implementation, see the comments |
| in <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/mbe/macro_parser"><code>compiler/rustc_expand/src/mbe/macro_parser.rs</code></a>.</p> |
| <h2 id="procedural-macros"><a class="header" href="#procedural-macros">Procedural Macros</a></h2> |
| <p>Procedural macros are also expanded during parsing. However, rather than |
| having a parser in the compiler, proc macros are implemented as custom, |
| third-party crates. The compiler will compile the proc macro crate and |
| specially annotated functions in them (i.e. the proc macro itself), passing |
| them a stream of tokens. A proc macro can then transform the token stream and |
| output a new token stream, which is synthesized into the AST.</p> |
| <p>The token stream type used by proc macros is <em>stable</em>, so <code>rustc</code> does not |
| use it internally. The compiler's (unstable) token stream is defined in |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_ast/tokenstream/struct.TokenStream.html"><code>rustc_ast::tokenstream::TokenStream</code></a>. This is converted into the |
| stable <a href="https://doc.rust-lang.org/proc_macro/struct.TokenStream.html"><code>proc_macro::TokenStream</code></a> and back in |
| <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/proc_macro/index.html"><code>rustc_expand::proc_macro</code></a> and <a href="https://doc.rust-lang.org/nightly/nightly-rustc/rustc_expand/proc_macro_server/index.html"><code>rustc_expand::proc_macro_server</code></a>. |
| Since the Rust ABI is currently unstable, we use the C ABI for this conversion.</p> |
| <!-- TODO(rylev): more here. [#1160](https://github.com/rust-lang/rustc-dev-guide/issues/1160) --> |
| <h3 id="custom-derive"><a class="header" href="#custom-derive">Custom Derive</a></h3> |
| <p>Custom derives are a special type of proc macro.</p> |
| <h3 id="macros-by-example-and-macros-20"><a class="header" href="#macros-by-example-and-macros-20">Macros By Example and Macros 2.0</a></h3> |
| <p>There is an legacy and mostly undocumented effort to improve the MBE system |
| by giving it more hygiene-related features, better scoping and visibility |
| rules, etc. Internally this uses the same machinery as today's MBEs with some |
| additional syntactic sugar and are allowed to be in namespaces.</p> |
| <!-- TODO(rylev): more? [#1160](https://github.com/rust-lang/rustc-dev-guide/issues/1160) --> |
| |
| </main> |
| |
| <nav class="nav-wrapper" aria-label="Page navigation"> |
| <!-- Mobile navigation buttons --> |
| <a rel="prev" href="the-parser.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left"> |
| <i class="fa fa-angle-left"></i> |
| </a> |
| |
| <a rel="next prefetch" href="name-resolution.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right"> |
| <i class="fa fa-angle-right"></i> |
| </a> |
| |
| <div style="clear: both"></div> |
| </nav> |
| </div> |
| </div> |
| |
| <nav class="nav-wide-wrapper" aria-label="Page navigation"> |
| <a rel="prev" href="the-parser.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left"> |
| <i class="fa fa-angle-left"></i> |
| </a> |
| |
| <a rel="next prefetch" href="name-resolution.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right"> |
| <i class="fa fa-angle-right"></i> |
| </a> |
| </nav> |
| |
| </div> |
| |
| |
| |
| |
| <script> |
| window.playground_copyable = true; |
| </script> |
| |
| |
| <script src="elasticlunr.min.js"></script> |
| <script src="mark.min.js"></script> |
| <script src="searcher.js"></script> |
| |
| <script src="clipboard.min.js"></script> |
| <script src="highlight.js"></script> |
| <script src="book.js"></script> |
| |
| <!-- Custom JS scripts --> |
| <script src="mermaid.min.js"></script> |
| <script src="mermaid-init.js"></script> |
| |
| |
| </div> |
| </body> |
| </html> |