| <!DOCTYPE HTML> |
| <html lang="en" class="light sidebar-visible" dir="ltr"> |
| <head> |
| <!-- Book generated using mdBook --> |
| <meta charset="UTF-8"> |
| <title>Optimizations: The speed size tradeoff - The Embedded Rust Book</title> |
| |
| |
| <!-- Custom HTML head --> |
| |
| <meta name="description" content=""> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <meta name="theme-color" content="#ffffff"> |
| |
| <link rel="icon" href="../favicon.svg"> |
| <link rel="shortcut icon" href="../favicon.png"> |
| <link rel="stylesheet" href="../css/variables.css"> |
| <link rel="stylesheet" href="../css/general.css"> |
| <link rel="stylesheet" href="../css/chrome.css"> |
| <link rel="stylesheet" href="../css/print.css" media="print"> |
| |
| <!-- Fonts --> |
| <link rel="stylesheet" href="../FontAwesome/css/font-awesome.css"> |
| <link rel="stylesheet" href="../fonts/fonts.css"> |
| |
| <!-- Highlight.js Stylesheets --> |
| <link rel="stylesheet" id="highlight-css" href="../highlight.css"> |
| <link rel="stylesheet" id="tomorrow-night-css" href="../tomorrow-night.css"> |
| <link rel="stylesheet" id="ayu-highlight-css" href="../ayu-highlight.css"> |
| |
| <!-- Custom theme stylesheets --> |
| |
| |
| <!-- Provide site root and default themes to javascript --> |
| <script> |
| const path_to_root = "../"; |
| const default_light_theme = "light"; |
| const default_dark_theme = "navy"; |
| </script> |
| <!-- Start loading toc.js asap --> |
| <script src="../toc.js"></script> |
| </head> |
| <body> |
| <div id="mdbook-help-container"> |
| <div id="mdbook-help-popup"> |
| <h2 class="mdbook-help-title">Keyboard shortcuts</h2> |
| <div> |
| <p>Press <kbd>←</kbd> or <kbd>→</kbd> to navigate between chapters</p> |
| <p>Press <kbd>S</kbd> or <kbd>/</kbd> to search in the book</p> |
| <p>Press <kbd>?</kbd> to show this help</p> |
| <p>Press <kbd>Esc</kbd> to hide this help</p> |
| </div> |
| </div> |
| </div> |
| <div id="body-container"> |
| <!-- Work around some values being stored in localStorage wrapped in quotes --> |
| <script> |
| try { |
| let theme = localStorage.getItem('mdbook-theme'); |
| let sidebar = localStorage.getItem('mdbook-sidebar'); |
| |
| if (theme.startsWith('"') && theme.endsWith('"')) { |
| localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1)); |
| } |
| |
| if (sidebar.startsWith('"') && sidebar.endsWith('"')) { |
| localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1)); |
| } |
| } catch (e) { } |
| </script> |
| |
| <!-- Set the theme before any content is loaded, prevents flash --> |
| <script> |
| const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme; |
| let theme; |
| try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { } |
| if (theme === null || theme === undefined) { theme = default_theme; } |
| const html = document.documentElement; |
| html.classList.remove('light') |
| html.classList.add(theme); |
| html.classList.add("js"); |
| </script> |
| |
| <input type="checkbox" id="sidebar-toggle-anchor" class="hidden"> |
| |
| <!-- Hide / unhide sidebar before it is displayed --> |
| <script> |
| let sidebar = null; |
| const sidebar_toggle = document.getElementById("sidebar-toggle-anchor"); |
| if (document.body.clientWidth >= 1080) { |
| try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { } |
| sidebar = sidebar || 'visible'; |
| } else { |
| sidebar = 'hidden'; |
| } |
| sidebar_toggle.checked = sidebar === 'visible'; |
| html.classList.remove('sidebar-visible'); |
| html.classList.add("sidebar-" + sidebar); |
| </script> |
| |
| <nav id="sidebar" class="sidebar" aria-label="Table of contents"> |
| <!-- populated by js --> |
| <mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox> |
| <noscript> |
| <iframe class="sidebar-iframe-outer" src="../toc.html"></iframe> |
| </noscript> |
| <div id="sidebar-resize-handle" class="sidebar-resize-handle"> |
| <div class="sidebar-resize-indicator"></div> |
| </div> |
| </nav> |
| |
| <div id="page-wrapper" class="page-wrapper"> |
| |
| <div class="page"> |
| <div id="menu-bar-hover-placeholder"></div> |
| <div id="menu-bar" class="menu-bar sticky"> |
| <div class="left-buttons"> |
| <label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar"> |
| <i class="fa fa-bars"></i> |
| </label> |
| <button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list"> |
| <i class="fa fa-paint-brush"></i> |
| </button> |
| <ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu"> |
| <li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="light">Light</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li> |
| </ul> |
| <button id="search-toggle" class="icon-button" type="button" title="Search (`/`)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="/ s" aria-controls="searchbar"> |
| <i class="fa fa-search"></i> |
| </button> |
| </div> |
| |
| <h1 class="menu-title">The Embedded Rust Book</h1> |
| |
| <div class="right-buttons"> |
| <a href="../print.html" title="Print this book" aria-label="Print this book"> |
| <i id="print-button" class="fa fa-print"></i> |
| </a> |
| <a href="https://github.com/rust-embedded/book" title="Git repository" aria-label="Git repository"> |
| <i id="git-repository-button" class="fa fa-github"></i> |
| </a> |
| |
| </div> |
| </div> |
| |
| <div id="search-wrapper" class="hidden"> |
| <form id="searchbar-outer" class="searchbar-outer"> |
| <input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header"> |
| </form> |
| <div id="searchresults-outer" class="searchresults-outer hidden"> |
| <div id="searchresults-header" class="searchresults-header"></div> |
| <ul id="searchresults"> |
| </ul> |
| </div> |
| </div> |
| |
| <!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM --> |
| <script> |
| document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible'); |
| document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible'); |
| Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) { |
| link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1); |
| }); |
| </script> |
| |
| <div id="content" class="content"> |
| <main> |
| <h1 id="optimizations-the-speed-size-tradeoff"><a class="header" href="#optimizations-the-speed-size-tradeoff">Optimizations: the speed size tradeoff</a></h1> |
| <p>Everyone wants their program to be super fast and super small but it's usually |
| not possible to have both characteristics. This section discusses the |
| different optimization levels that <code>rustc</code> provides and how they affect the |
| execution time and binary size of a program.</p> |
| <h2 id="no-optimizations"><a class="header" href="#no-optimizations">No optimizations</a></h2> |
| <p>This is the default. When you call <code>cargo build</code> you use the development (AKA |
| <code>dev</code>) profile. This profile is optimized for debugging so it enables debug |
| information and does <em>not</em> enable any optimizations, i.e. it uses <code>-C opt-level = 0</code>.</p> |
| <p>At least for bare metal development, debuginfo is zero cost in the sense that it |
| won't occupy space in Flash / ROM so we actually recommend that you enable |
| debuginfo in the release profile -- it is disabled by default. That will let you |
| use breakpoints when debugging release builds.</p> |
| <pre><code class="language-toml">[profile.release] |
| # symbols are nice and they don't increase the size on Flash |
| debug = true |
| </code></pre> |
| <p>No optimizations is great for debugging because stepping through the code feels |
| like you are executing the program statement by statement, plus you can <code>print</code> |
| stack variables and function arguments in GDB. When the code is optimized, trying |
| to print variables results in <code>$0 = <value optimized out></code> being printed.</p> |
| <p>The biggest downside of the <code>dev</code> profile is that the resulting binary will be |
| huge and slow. The size is usually more of a problem because unoptimized |
| binaries can occupy dozens of KiB of Flash, which your target device may not |
| have -- the result: your unoptimized binary doesn't fit in your device!</p> |
| <p>Can we have smaller, debugger friendly binaries? Yes, there's a trick.</p> |
| <h3 id="optimizing-dependencies"><a class="header" href="#optimizing-dependencies">Optimizing dependencies</a></h3> |
| <p>There's a Cargo feature named <a href="https://doc.rust-lang.org/cargo/reference/profiles.html#overrides"><code>profile-overrides</code></a> that lets you |
| override the optimization level of dependencies. You can use that feature to |
| optimize all dependencies for size while keeping the top crate unoptimized and |
| debugger friendly.</p> |
| <p>Beware that generic code can sometimes be optimized alongside the crate where it |
| is instantiated, rather than the crate where it is defined. If you create an |
| instance of a generic struct in your application and find that it pulls in code |
| with a large footprint, it may be that increasing the optimisation level of the |
| relevant dependencies has no effect.</p> |
| <p>Here's an example:</p> |
| <pre><code class="language-toml"># Cargo.toml |
| [package] |
| name = "app" |
| # .. |
| |
| [profile.dev.package."*"] # + |
| opt-level = "z" # + |
| </code></pre> |
| <p>Without the override:</p> |
| <pre><code class="language-text">$ cargo size --bin app -- -A |
| app : |
| section size addr |
| .vector_table 1024 0x8000000 |
| .text 9060 0x8000400 |
| .rodata 1708 0x8002780 |
| .data 0 0x20000000 |
| .bss 4 0x20000000 |
| </code></pre> |
| <p>With the override:</p> |
| <pre><code class="language-text">$ cargo size --bin app -- -A |
| app : |
| section size addr |
| .vector_table 1024 0x8000000 |
| .text 3490 0x8000400 |
| .rodata 1100 0x80011c0 |
| .data 0 0x20000000 |
| .bss 4 0x20000000 |
| </code></pre> |
| <p>That's a 6 KiB reduction in Flash usage without any loss in the debuggability of |
| the top crate. If you step into a dependency then you'll start seeing those |
| <code><value optimized out></code> messages again but it's usually the case that you want |
| to debug the top crate and not the dependencies. And if you <em>do</em> need to debug a |
| dependency then you can use the <code>profile-overrides</code> feature to exclude a |
| particular dependency from being optimized. See example below:</p> |
| <pre><code class="language-toml"># .. |
| |
| # don't optimize the `cortex-m-rt` crate |
| [profile.dev.package.cortex-m-rt] # + |
| opt-level = 0 # + |
| |
| # but do optimize all the other dependencies |
| [profile.dev.package."*"] |
| codegen-units = 1 # better optimizations |
| opt-level = "z" |
| </code></pre> |
| <p>Now the top crate and <code>cortex-m-rt</code> are debugger friendly!</p> |
| <h2 id="optimize-for-speed"><a class="header" href="#optimize-for-speed">Optimize for speed</a></h2> |
| <p>As of 2018-09-18 <code>rustc</code> supports three "optimize for speed" levels: <code>opt-level = 1</code>, <code>2</code> and <code>3</code>. When you run <code>cargo build --release</code> you are using the release |
| profile which defaults to <code>opt-level = 3</code>.</p> |
| <p>Both <code>opt-level = 2</code> and <code>3</code> optimize for speed at the expense of binary size, |
| but level <code>3</code> does more vectorization and inlining than level <code>2</code>. In |
| particular, you'll see that at <code>opt-level</code> equal to or greater than <code>2</code> LLVM will |
| unroll loops. Loop unrolling has a rather high cost in terms of Flash / ROM |
| (e.g. from 26 bytes to 194 for a zero this array loop) but can also halve the |
| execution time given the right conditions (e.g. number of iterations is big |
| enough).</p> |
| <p>Currently there's no way to disable loop unrolling in <code>opt-level = 2</code> and <code>3</code> so |
| if you can't afford its cost you should optimize your program for size.</p> |
| <h2 id="optimize-for-size"><a class="header" href="#optimize-for-size">Optimize for size</a></h2> |
| <p>As of 2018-09-18 <code>rustc</code> supports two "optimize for size" levels: <code>opt-level = "s"</code> and <code>"z"</code>. These names were inherited from clang / LLVM and are not too |
| descriptive but <code>"z"</code> is meant to give the idea that it produces smaller |
| binaries than <code>"s"</code>.</p> |
| <p>If you want your release binaries to be optimized for size then change the |
| <code>profile.release.opt-level</code> setting in <code>Cargo.toml</code> as shown below.</p> |
| <pre><code class="language-toml">[profile.release] |
| # or "z" |
| opt-level = "s" |
| </code></pre> |
| <p>These two optimization levels greatly reduce LLVM's inline threshold, a metric |
| used to decide whether to inline a function or not. One of Rust principles are |
| zero cost abstractions; these abstractions tend to use a lot of newtypes and |
| small functions to hold invariants (e.g. functions that borrow an inner value |
| like <code>deref</code>, <code>as_ref</code>) so a low inline threshold can make LLVM miss |
| optimization opportunities (e.g. eliminate dead branches, inline calls to |
| closures).</p> |
| <p>When optimizing for size you may want to try increasing the inline threshold to |
| see if that has any effect on the binary size. The recommended way to change the |
| inline threshold is to append the <code>-C inline-threshold</code> flag to the other |
| rustflags in <code>.cargo/config.toml</code>.</p> |
| <pre><code class="language-toml"># .cargo/config.toml |
| # this assumes that you are using the cortex-m-quickstart template |
| [target.'cfg(all(target_arch = "arm", target_os = "none"))'] |
| rustflags = [ |
| # .. |
| "-C", "inline-threshold=123", # + |
| ] |
| </code></pre> |
| <p>What value to use? <a href="https://github.com/rust-lang/rust/blob/1.29.0/src/librustc_codegen_llvm/back/write.rs#L2105-L2122">As of 1.29.0 these are the inline thresholds that the |
| different optimization levels use</a>:</p> |
| <ul> |
| <li><code>opt-level = 3</code> uses 275</li> |
| <li><code>opt-level = 2</code> uses 225</li> |
| <li><code>opt-level = "s"</code> uses 75</li> |
| <li><code>opt-level = "z"</code> uses 25</li> |
| </ul> |
| <p>You should try <code>225</code> and <code>275</code> when optimizing for size.</p> |
| |
| </main> |
| |
| <nav class="nav-wrapper" aria-label="Page navigation"> |
| <!-- Mobile navigation buttons --> |
| <a rel="prev" href="../unsorted/index.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left"> |
| <i class="fa fa-angle-left"></i> |
| </a> |
| |
| <a rel="next prefetch" href="../unsorted/math.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right"> |
| <i class="fa fa-angle-right"></i> |
| </a> |
| |
| <div style="clear: both"></div> |
| </nav> |
| </div> |
| </div> |
| |
| <nav class="nav-wide-wrapper" aria-label="Page navigation"> |
| <a rel="prev" href="../unsorted/index.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left"> |
| <i class="fa fa-angle-left"></i> |
| </a> |
| |
| <a rel="next prefetch" href="../unsorted/math.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right"> |
| <i class="fa fa-angle-right"></i> |
| </a> |
| </nav> |
| |
| </div> |
| |
| |
| |
| |
| <script> |
| window.playground_copyable = true; |
| </script> |
| |
| |
| <script src="../elasticlunr.min.js"></script> |
| <script src="../mark.min.js"></script> |
| <script src="../searcher.js"></script> |
| |
| <script src="../clipboard.min.js"></script> |
| <script src="../highlight.js"></script> |
| <script src="../book.js"></script> |
| |
| <!-- Custom JS scripts --> |
| |
| |
| </div> |
| </body> |
| </html> |