| <!DOCTYPE HTML> |
| <html lang="en" class="light sidebar-visible" dir="ltr"> |
| <head> |
| <!-- Book generated using mdBook --> |
| <meta charset="UTF-8"> |
| <title>Search - Rust Compiler Development Guide</title> |
| |
| |
| <!-- Custom HTML head --> |
| |
| <meta name="description" content="A guide to developing the Rust compiler (rustc)"> |
| <meta name="viewport" content="width=device-width, initial-scale=1"> |
| <meta name="theme-color" content="#ffffff"> |
| |
| <link rel="icon" href="../favicon.svg"> |
| <link rel="shortcut icon" href="../favicon.png"> |
| <link rel="stylesheet" href="../css/variables.css"> |
| <link rel="stylesheet" href="../css/general.css"> |
| <link rel="stylesheet" href="../css/chrome.css"> |
| <link rel="stylesheet" href="../css/print.css" media="print"> |
| |
| <!-- Fonts --> |
| <link rel="stylesheet" href="../FontAwesome/css/font-awesome.css"> |
| <link rel="stylesheet" href="../fonts/fonts.css"> |
| |
| <!-- Highlight.js Stylesheets --> |
| <link rel="stylesheet" id="highlight-css" href="../highlight.css"> |
| <link rel="stylesheet" id="tomorrow-night-css" href="../tomorrow-night.css"> |
| <link rel="stylesheet" id="ayu-highlight-css" href="../ayu-highlight.css"> |
| |
| <!-- Custom theme stylesheets --> |
| <link rel="stylesheet" href="../pagetoc.css"> |
| |
| |
| <!-- Provide site root and default themes to javascript --> |
| <script> |
| const path_to_root = "../"; |
| const default_light_theme = "light"; |
| const default_dark_theme = "navy"; |
| </script> |
| <!-- Start loading toc.js asap --> |
| <script src="../toc.js"></script> |
| </head> |
| <body> |
| <div id="body-container"> |
| <!-- Work around some values being stored in localStorage wrapped in quotes --> |
| <script> |
| try { |
| let theme = localStorage.getItem('mdbook-theme'); |
| let sidebar = localStorage.getItem('mdbook-sidebar'); |
| |
| if (theme.startsWith('"') && theme.endsWith('"')) { |
| localStorage.setItem('mdbook-theme', theme.slice(1, theme.length - 1)); |
| } |
| |
| if (sidebar.startsWith('"') && sidebar.endsWith('"')) { |
| localStorage.setItem('mdbook-sidebar', sidebar.slice(1, sidebar.length - 1)); |
| } |
| } catch (e) { } |
| </script> |
| |
| <!-- Set the theme before any content is loaded, prevents flash --> |
| <script> |
| const default_theme = window.matchMedia("(prefers-color-scheme: dark)").matches ? default_dark_theme : default_light_theme; |
| let theme; |
| try { theme = localStorage.getItem('mdbook-theme'); } catch(e) { } |
| if (theme === null || theme === undefined) { theme = default_theme; } |
| const html = document.documentElement; |
| html.classList.remove('light') |
| html.classList.add(theme); |
| html.classList.add("js"); |
| </script> |
| |
| <input type="checkbox" id="sidebar-toggle-anchor" class="hidden"> |
| |
| <!-- Hide / unhide sidebar before it is displayed --> |
| <script> |
| let sidebar = null; |
| const sidebar_toggle = document.getElementById("sidebar-toggle-anchor"); |
| if (document.body.clientWidth >= 1080) { |
| try { sidebar = localStorage.getItem('mdbook-sidebar'); } catch(e) { } |
| sidebar = sidebar || 'visible'; |
| } else { |
| sidebar = 'hidden'; |
| } |
| sidebar_toggle.checked = sidebar === 'visible'; |
| html.classList.remove('sidebar-visible'); |
| html.classList.add("sidebar-" + sidebar); |
| </script> |
| |
| <nav id="sidebar" class="sidebar" aria-label="Table of contents"> |
| <!-- populated by js --> |
| <mdbook-sidebar-scrollbox class="sidebar-scrollbox"></mdbook-sidebar-scrollbox> |
| <noscript> |
| <iframe class="sidebar-iframe-outer" src="../toc.html"></iframe> |
| </noscript> |
| <div id="sidebar-resize-handle" class="sidebar-resize-handle"> |
| <div class="sidebar-resize-indicator"></div> |
| </div> |
| </nav> |
| |
| <div id="page-wrapper" class="page-wrapper"> |
| |
| <div class="page"> |
| <div id="menu-bar-hover-placeholder"></div> |
| <div id="menu-bar" class="menu-bar sticky"> |
| <div class="left-buttons"> |
| <label id="sidebar-toggle" class="icon-button" for="sidebar-toggle-anchor" title="Toggle Table of Contents" aria-label="Toggle Table of Contents" aria-controls="sidebar"> |
| <i class="fa fa-bars"></i> |
| </label> |
| <button id="theme-toggle" class="icon-button" type="button" title="Change theme" aria-label="Change theme" aria-haspopup="true" aria-expanded="false" aria-controls="theme-list"> |
| <i class="fa fa-paint-brush"></i> |
| </button> |
| <ul id="theme-list" class="theme-popup" aria-label="Themes" role="menu"> |
| <li role="none"><button role="menuitem" class="theme" id="default_theme">Auto</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="light">Light</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="rust">Rust</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="coal">Coal</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="navy">Navy</button></li> |
| <li role="none"><button role="menuitem" class="theme" id="ayu">Ayu</button></li> |
| </ul> |
| <button id="search-toggle" class="icon-button" type="button" title="Search. (Shortkey: s)" aria-label="Toggle Searchbar" aria-expanded="false" aria-keyshortcuts="S" aria-controls="searchbar"> |
| <i class="fa fa-search"></i> |
| </button> |
| </div> |
| |
| <h1 class="menu-title">Rust Compiler Development Guide</h1> |
| |
| <div class="right-buttons"> |
| <a href="../print.html" title="Print this book" aria-label="Print this book"> |
| <i id="print-button" class="fa fa-print"></i> |
| </a> |
| <a href="https://github.com/rust-lang/rustc-dev-guide" title="Git repository" aria-label="Git repository"> |
| <i id="git-repository-button" class="fa fa-github"></i> |
| </a> |
| <a href="https://github.com/rust-lang/rustc-dev-guide/edit/master/src/rustdoc-internals/search.md" title="Suggest an edit" aria-label="Suggest an edit"> |
| <i id="git-edit-button" class="fa fa-edit"></i> |
| </a> |
| |
| </div> |
| </div> |
| |
| <div id="search-wrapper" class="hidden"> |
| <form id="searchbar-outer" class="searchbar-outer"> |
| <input type="search" id="searchbar" name="searchbar" placeholder="Search this book ..." aria-controls="searchresults-outer" aria-describedby="searchresults-header"> |
| </form> |
| <div id="searchresults-outer" class="searchresults-outer hidden"> |
| <div id="searchresults-header" class="searchresults-header"></div> |
| <ul id="searchresults"> |
| </ul> |
| </div> |
| </div> |
| |
| <!-- Apply ARIA attributes after the sidebar and the sidebar toggle button are added to the DOM --> |
| <script> |
| document.getElementById('sidebar-toggle').setAttribute('aria-expanded', sidebar === 'visible'); |
| document.getElementById('sidebar').setAttribute('aria-hidden', sidebar !== 'visible'); |
| Array.from(document.querySelectorAll('#sidebar a')).forEach(function(link) { |
| link.setAttribute('tabIndex', sidebar === 'visible' ? 0 : -1); |
| }); |
| </script> |
| |
| <div id="content" class="content"> |
| <main> |
| <h1 id="rustdoc-search"><a class="header" href="#rustdoc-search">Rustdoc search</a></h1> |
| <p>Rustdoc Search is two programs: <code>search_index.rs</code> |
| and <code>search.js</code>. The first generates a nasty JSON |
| file with a full list of items and function signatures |
| in the crates in the doc bundle, and the second reads |
| it, turns it into some in-memory structures, and |
| scans them linearly to search.</p> |
| <h2 id="search-index-format"><a class="header" href="#search-index-format">Search index format</a></h2> |
| <p><code>search.js</code> calls this Raw, because it turns it into |
| a more normal object tree after loading it. |
| For space savings, it's also written without newlines or spaces.</p> |
| <pre><code class="language-json">[ |
| [ "crate_name", { |
| // name |
| "n": ["function_name", "Data"], |
| // type |
| "t": "HF", |
| // parent module |
| "q": [[0, "crate_name"]], |
| // parent type |
| "i": [2, 0], |
| // type dictionary |
| "p": [[1, "i32"], [1, "str"], [5, "Data", 0]], |
| // function signature |
| "f": "{{gb}{d}}`", // [[3, 1], [2]] |
| // impl disambiguator |
| "b": [], |
| // deprecated flag |
| "c": "OjAAAAAAAAA=", // empty bitmap |
| // empty description flag |
| "e": "OjAAAAAAAAA=", // empty bitmap |
| // aliases |
| "a": [["get_name", 0]], |
| // description shards |
| "D": "g", // 3 |
| // inlined re-exports |
| "r": [], |
| }] |
| ] |
| </code></pre> |
| <p><a href="https://github.com/rust-lang/rust/blob/2f92f050e83bf3312ce4ba73c31fe843ad3cbc60/src/librustdoc/html/static/js/rustdoc.d.ts#L344-L390"><code>src/librustdoc/html/static/js/rustdoc.d.ts</code></a> |
| defines an actual schema in a TypeScript <code>type</code>.</p> |
| <div class="table-wrapper"><table><thead><tr><th>Key</th><th>Name</th><th>Description</th></tr></thead><tbody> |
| <tr><td><code>n</code></td><td>Names</td><td>Item names</td></tr> |
| <tr><td><code>t</code></td><td>Item Type</td><td>One-char item type code</td></tr> |
| <tr><td><code>q</code></td><td>Parent module</td><td><code>Map<index, path></code></td></tr> |
| <tr><td><code>i</code></td><td>Parent type</td><td>list of indexes</td></tr> |
| <tr><td><code>f</code></td><td>Function signature</td><td><a href="#i-f-and-p">encoded</a></td></tr> |
| <tr><td><code>b</code></td><td>Impl disambiguator</td><td><code>Map<index, string></code></td></tr> |
| <tr><td><code>c</code></td><td>Deprecation flag</td><td><a href="#roaring-bitmaps">roaring bitmap</a></td></tr> |
| <tr><td><code>e</code></td><td>Description is empty</td><td><a href="#roaring-bitmaps">roaring bitmap</a></td></tr> |
| <tr><td><code>p</code></td><td>Type dictionary</td><td><code>[[item type, path]]</code></td></tr> |
| <tr><td><code>a</code></td><td>Alias</td><td><code>Map<string, index></code></td></tr> |
| <tr><td><code>D</code></td><td>description shards</td><td><a href="#how-descriptions-are-stored">encoded</a></td></tr> |
| </tbody></table> |
| </div> |
| <p>The above index defines a crate called <code>crate_name</code> |
| with a free function called <code>function_name</code> and a struct called <code>Data</code>, |
| with the type signature <code>Data, i32 -> str</code>, |
| and an alias, <code>get_name</code>, that equivalently refers to <code>function_name</code>.</p> |
| <p>The search index needs to fit the needs of the <code>rustdoc</code> compiler, |
| the <code>search.js</code> frontend, |
| and also be compact and fast to decode. |
| It makes a lot of compromises:</p> |
| <ul> |
| <li>The <code>rustdoc</code> compiler runs on one crate at a time, |
| so each crate has an essentially separate search index. |
| It <a href="https://github.com/rust-lang/rust/blob/79b710c13968a1a48d94431d024d2b1677940866/src/librustdoc/html/render/write_shared.rs#L151-L164">merges</a> them by having each crate on one line |
| and looking at the first quoted string.</li> |
| <li>Names in the search index are given |
| in their original case and with underscores. |
| When the search index is loaded, |
| <code>search.js</code> stores the original names for display, |
| but also folds them to lowercase and strips underscores for search. |
| You'll see them called <code>normalized</code>.</li> |
| <li>The <code>f</code> array stores types as offsets into the <code>p</code> array. |
| These types might actually be from another crate, |
| so <code>search.js</code> has to turn the numbers into names and then |
| back into numbers to deduplicate them if multiple crates in the |
| same index mention the same types.</li> |
| <li>It's a JSON file, but not designed to be human-readable. |
| Browsers already include an optimized JSON decoder, |
| so this saves on <code>search.js</code> code and performs better for small crates, |
| but instead of using objects like normal JSON formats do, |
| it tries to put data of the same type next to each other |
| so that the sliding window used by <a href="https://en.wikipedia.org/wiki/Deflate">DEFLATE</a> can find redundancies. |
| Where <code>search.js</code> does its own compression, |
| it's designed to save memory when the file is finally loaded, |
| not just size on disk or network transfer.</li> |
| </ul> |
| <h3 id="parallel-arrays-and-indexed-maps"><a class="header" href="#parallel-arrays-and-indexed-maps">Parallel arrays and indexed maps</a></h3> |
| <p>Abstractly, Rustdoc Search data is a table, stored in column-major form. |
| Most data in the index represents a set of parallel arrays |
| (the "columns") which refer to the same data if they're at the same position.</p> |
| <p>For example, |
| the above search index can be turned into this table:</p> |
| <div class="table-wrapper"><table><thead><tr><th></th><th>n</th><th>t</th><th><a href="#how-descriptions-are-stored">d</a></th><th>q</th><th>i</th><th>f</th><th>b</th><th>c</th></tr></thead><tbody> |
| <tr><td>0</td><td><code>crate_name</code></td><td><code>D</code></td><td>Documentation</td><td>NULL</td><td>0</td><td>NULL</td><td>NULL</td><td>0</td></tr> |
| <tr><td>1</td><td><code>function_name</code></td><td><code>H</code></td><td>This function gets the name of an integer with Data</td><td><code>crate_name</code></td><td>2</td><td><code>{{gb}{d}}</code></td><td>NULL</td><td>0</td></tr> |
| <tr><td>2</td><td><code>Data</code></td><td><code>F</code></td><td>The data struct</td><td><code>crate_name</code></td><td>0</td><td><code>`</code></td><td>NULL</td><td>0</td></tr> |
| </tbody></table> |
| </div> |
| <p>The crate row is implied in most columns, since its type is known (it's a crate), |
| it can't have a parent (crates form the root of the module tree), |
| its name is specified as the map key, |
| and function-specific data like the impl disambiguator can't apply either. |
| However, it can still have a description and it can still be deprecated. |
| The crate, therefore, has a primary key of <code>0</code>.</p> |
| <p>The above code doesn't use <code>c</code>, which holds deprecated indices, |
| or <code>b</code>, which maps indices to strings. |
| If <code>crate_name::function_name</code> used both, it might look like this.</p> |
| <pre><code class="language-json"> "b": [[0, "impl-Foo-for-Bar"]], |
| "c": "OjAAAAEAAAAAAAIAEAAAABUAbgZYCQ==", |
| </code></pre> |
| <p>This attaches a disambiguator to index 1 and marks it deprecated.</p> |
| <p>The advantage of this layout is that these APIs often have implicit structure |
| that DEFLATE can take advantage of, |
| but that rustdoc can't assume. |
| Like how names are usually CamelCase or snake_case, |
| but descriptions aren't. |
| It also makes it easier to use a sparse data for things like boolean flags.</p> |
| <p><code>q</code> is a Map from <em>the first applicable</em> ID to a parent module path. |
| This is a weird trick, but it makes more sense in pseudo-code:</p> |
| <pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)] |
| </span><span class="boring">fn main() { |
| </span>let mut parent_module = ""; |
| for (i, entry) in search_index.iter().enumerate() { |
| if q.contains(i) { |
| parent_module = q.get(i); |
| } |
| // ... do other stuff with `entry` ... |
| } |
| <span class="boring">}</span></code></pre></pre> |
| <p>This is valid because everything has a parent module |
| (even if it's just the crate itself), |
| and is easy to assemble because the rustdoc generator sorts by path |
| before serializing. |
| Doing this allows rustdoc to not only make the search index smaller, |
| but reuse the same string representing the parent path across multiple in-memory items.</p> |
| <h3 id="representing-sparse-columns"><a class="header" href="#representing-sparse-columns">Representing sparse columns</a></h3> |
| <h4 id="vlq-hex"><a class="header" href="#vlq-hex">VLQ Hex</a></h4> |
| <p>This format is, as far as I know, used nowhere other than rustdoc. |
| It follows this grammar:</p> |
| <pre><code class="language-ebnf">VLQHex = { VHItem | VHBackref } |
| VHItem = VHNumber | ( '{', {VHItem}, '}' ) |
| VHNumber = { '@' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' }, ( '`' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k ' | 'l' | 'm' | 'n' | 'o' ) |
| VHBackref = ( '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | ':' | ';' | '<' | '=' | '>' | '?' ) |
| </code></pre> |
| <p>A VHNumber is a variable-length, self-terminating base16 number |
| (terminated because the last hexit is lowercase while all others are uppercase). |
| The sign bit is represented using <a href="https://en.wikipedia.org/wiki/Variable-length_quantity#Zigzag_encoding">zig-zag encoding</a>.</p> |
| <p>This alphabet is chosen because the characters can be turned into hexits by |
| masking off the last four bits of the ASCII encoding.</p> |
| <p>A major feature of this encoding, as with all of the "compression" done in rustdoc, |
| is that it can remain in its compressed format <em>even in memory at runtime</em>. |
| This is why <code>HBackref</code> is only used at the top level, |
| and why we don't just use <a href="https://en.wikipedia.org/wiki/Deflate">Flate</a> for everything: the decoder in search.js |
| will reuse the entire decoded object whenever a backref is seen, |
| saving decode work and memory.</p> |
| <h4 id="roaring-bitmaps"><a class="header" href="#roaring-bitmaps">Roaring Bitmaps</a></h4> |
| <p>Flag-style data, such as deprecation and empty descriptions, |
| are stored using the <a href="https://github.com/RoaringBitmap/RoaringFormatSpec">standard Roaring Bitmap serialization format with runs</a>. |
| The data is then base64 encoded when writing it.</p> |
| <p>As a brief overview: a roaring bitmap is a chunked array of bits, |
| described in <a href="https://arxiv.org/pdf/1603.06549.pdf">this paper</a>. |
| A chunk can either be a list of integers, a bitfield, or a list of runs. |
| In any case, the search engine has to base64 decode it, |
| and read the chunk index itself, |
| but the payload data stays as-is.</p> |
| <p>All roaring bitmaps in rustdoc currently store a flag for each item index. |
| The crate is item 0, all others start at 1.</p> |
| <h3 id="how-descriptions-are-stored"><a class="header" href="#how-descriptions-are-stored">How descriptions are stored</a></h3> |
| <p>The largest amount of data, |
| and the main thing Rustdoc Search deals with that isn't |
| actually used for searching, is descriptions. |
| In a SERP table, this is what appears on the rightmost column.</p> |
| <blockquote> |
| <div class="table-wrapper"><table><thead><tr><th>item type</th><th>item path</th><th><em><strong>description</strong></em> (this part)</th></tr></thead><tbody> |
| <tr><td>function</td><td>my_crate::my_function</td><td>This function gets the name of an integer with Data</td></tr> |
| </tbody></table> |
| </div></blockquote> |
| <p>When someone runs a search in rustdoc for the first time, their browser will |
| work through a "sandwich workload" of three steps:</p> |
| <ol> |
| <li>Download the search-index.js and search.js files (a network bottleneck).</li> |
| <li>Perform the actual search (a CPU and memory bandwidth bottleneck).</li> |
| <li>Download the description data (another network bottleneck).</li> |
| </ol> |
| <p>Reducing the amount of data downloaded here will almost always increase latency, |
| by delaying the decision of what to download behind other work and/or adding |
| data dependencies where something can't be downloaded without first downloading |
| something else. In this case, we can't start downloading descriptions until |
| after the search is done, because that's what allows it to decide <em>which</em> |
| descriptions to download (it needs to sort the results then truncate to 200).</p> |
| <p>To do this, two columns are stored in the search index, building on both |
| Roaring Bitmaps and on VLQ Hex.</p> |
| <ul> |
| <li><code>e</code> is an index of <strong>e</strong>mpty descriptions. It's a <a href="#roaring-bitmaps">roaring bitmap</a> of |
| each item (the crate itself is item 0, the rest start at 1).</li> |
| <li><code>D</code> is a shard list, stored in <a href="#vlq-hex">VLQ hex</a> as flat list of integers. |
| Each integer gives you the number of descriptions in the shard. |
| As the decoder walks the index, it checks if the description is empty. |
| if it's not, then it's in the "current" shard. When all items are |
| exhausted, it goes on to the next shard.</li> |
| </ul> |
| <p>Inside each shard is a newline-delimited list of descriptions, |
| wrapped in a JSONP-style function call.</p> |
| <h3 id="i-f-and-p"><a class="header" href="#i-f-and-p"><code>i</code>, <code>f</code>, and <code>p</code></a></h3> |
| <p><code>i</code> and <code>f</code> both index into <code>p</code>, the array of parent items.</p> |
| <p><code>i</code> is just a one-indexed number |
| (not zero-indexed because <code>0</code> is used for items that have no parent item). |
| It's different from <code>q</code> because <code>q</code> represents the parent <em>module or crate</em>, |
| which everything has, |
| while <code>i</code>/<code>q</code> are used for <em>type and trait-associated items</em> like methods.</p> |
| <p><code>f</code>, the function signatures, use a <a href="#vlq-hex">VLQ hex</a> tree. |
| A number is either a one-indexed reference into <code>p</code>, |
| a negative number representing a generic, |
| or zero for null.</p> |
| <p>(the internal object representation also uses negative numbers, |
| even after decoding, |
| to represent generics).</p> |
| <p>For example, <code>{{gb}{d}}</code> is equivalent to the json <code>[[3, 1], [2]]</code>. |
| Because of zigzag encoding, <code>`</code> is +0, <code>a</code> is -0 (which is not used), |
| <code>b</code> is +1, and <code>c</code> is -1.</p> |
| <h2 id="searching-by-name"><a class="header" href="#searching-by-name">Searching by name</a></h2> |
| <p>Searching by name works by looping through the search index |
| and running these functions on each:</p> |
| <ul> |
| <li><a href="https://github.com/rust-lang/rust/blob/79b710c13968a1a48d94431d024d2b1677940866/src/librustdoc/html/static/js/search.js#L137"><code>editDistance</code></a> is always used to determine a match |
| (unless quotes are specified, which would use simple equality instead). |
| It computes the number of swaps, inserts, and removes needed to turn |
| the query name into the entry name. |
| For example, <code>foo</code> has zero distance from itself, |
| but a distance of 1 from <code>ofo</code> (one swap) and <code>foob</code> (one insert). |
| It is checked against an heuristic threshold, and then, |
| if it is within that threshold, the distance is stored for ranking.</li> |
| <li><a href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/indexOf"><code>String.prototype.indexOf</code></a> is always used to determine a match. |
| If it returns anything other than -1, the result is added, |
| even if <code>editDistance</code> exceeds its threshold, |
| and the index is stored for ranking.</li> |
| <li><a href="https://github.com/rust-lang/rust/blob/79b710c13968a1a48d94431d024d2b1677940866/src/librustdoc/html/static/js/search.js#L1814"><code>checkPath</code></a> is used if, and only if, a parent path is specified |
| in the query. For example, <code>vec</code> has no parent path, but <code>vec::vec</code> does. |
| Within checkPath, editDistance and indexOf are used, |
| and the path query has its own heuristic threshold, too. |
| If it's not within the threshold, the entry is rejected, |
| even if the first two pass. |
| If it's within the threshold, the path distance is stored |
| for ranking.</li> |
| <li><a href="https://github.com/rust-lang/rust/blob/79b710c13968a1a48d94431d024d2b1677940866/src/librustdoc/html/static/js/search.js#L1787"><code>checkType</code></a> is used only if there's a type filter, |
| like the struct in <code>struct:vec</code>. If it fails, |
| the entry is rejected.</li> |
| </ul> |
| <p>If all four criteria pass |
| (plus the crate filter, which isn't technically part of the query), |
| the results are sorted by <a href="https://github.com/rust-lang/rust/blob/79b710c13968a1a48d94431d024d2b1677940866/src/librustdoc/html/static/js/search.js#L1229"><code>sortResults</code></a>.</p> |
| <h2 id="searching-by-type"><a class="header" href="#searching-by-type">Searching by type</a></h2> |
| <p>Searching by type can be divided into two phases, |
| and the second phase has two sub-phases.</p> |
| <ul> |
| <li>Turn names in the query into numbers.</li> |
| <li>Loop over each entry in the search index: |
| <ul> |
| <li>Quick rejection using a bloom filter.</li> |
| <li>Slow rejection using a recursive type unification algorithm.</li> |
| </ul> |
| </li> |
| </ul> |
| <p>In the names->numbers phase, if the query has only one name in it, |
| the editDistance function is used to find a near match if the exact match fails, |
| but if there's multiple items in the query, |
| non-matching items are treated as generics instead. |
| This means <code>hahsmap</code> will match hashmap on its own, but <code>hahsmap, u32</code> |
| is going to match the same things <code>T, u32</code> matches |
| (though rustdoc will detect this particular problem and warn about it).</p> |
| <p>Then, when actually looping over each item, |
| the bloom filter will probably reject entries that don't have every |
| type mentioned in the query. |
| For example, the bloom query allows a query of <code>i32 -> u32</code> to match |
| a function with the type <code>i32, u32 -> bool</code>, |
| but unification will reject it later.</p> |
| <p>The unification filter ensures that:</p> |
| <ul> |
| <li>Bag semantics are respected. If you query says <code>i32, i32</code>, |
| then the function has to mention <em>two</em> i32s, not just one.</li> |
| <li>Nesting semantics are respected. If your query says <code>vec<option></code>, |
| then <code>vec<option<i32>></code> is fine, but <code>option<vec<i32>></code> <em>is not</em> a match.</li> |
| <li>The division between return type and parameter is respected. |
| <code>i32 -> u32</code> and <code>u32 -> i32</code> are completely different.</li> |
| </ul> |
| <p>The bloom filter checks none of these things, |
| and, on top of that, can have false positives. |
| But it's fast and uses very little memory, so the bloom filter helps.</p> |
| <h2 id="re-exports"><a class="header" href="#re-exports">Re-exports</a></h2> |
| <p><a href="https://doc.rust-lang.org/nightly/rustdoc/write-documentation/re-exports.html">Re-export inlining</a> allows the same item to be found by multiple names. |
| Search supports this by giving the same item multiple entries and tracking a canonical path |
| for any items where that differs from the given path.</p> |
| <p>For example, this sample index has a single struct exported from two paths:</p> |
| <pre><code class="language-json">[ |
| [ "crate_name", { |
| "doc": "Documentation", |
| "n": ["Data", "Data"], |
| "t": "FF", |
| "d": ["The data struct", "The data struct"], |
| "q": [[0, "crate_name"], [1, "crate_name::submodule"]], |
| "i": [0, 0], |
| "p": [], |
| "f": "``", |
| "b": [], |
| "c": [], |
| "a": [], |
| "r": [[0, 1]], |
| }] |
| ] |
| </code></pre> |
| <p>The important part of this example is the <code>r</code> array, |
| which indicates that path entry 1 in the <code>q</code> array is |
| the canonical path for item 0. |
| That is, <code>crate_name::Data</code> has a canonical path of <code>crate_name::submodule::Data</code>.</p> |
| <p>This might sound like a strange design, since it has the duplicate data. |
| It's done that way because inlining can happen across crates, |
| which are compiled separately and might not all be present in the docs.</p> |
| <pre><code class="language-json">[ |
| [ "crate_name", ... ], |
| [ "crate_name_2", { "q": [[0, "crate_name::submodule"], [5, "core::option"]], ... }] |
| ] |
| </code></pre> |
| <p>In the above example, a canonical path actually comes from a dependency, |
| and another one comes from an inlined standard library item: |
| the canonical path isn't even in the index! |
| The canonical path might also be private. |
| In either case, it's never shown to the user, and is only used for deduplication.</p> |
| <p>Associated types, like methods, store them differently. |
| These types are connected with an entry in <code>p</code> (their "parent") |
| and each one has an optional third tuple element:</p> |
| <pre><code>"p": [[5, "Data", 0, 1]] |
| </code></pre> |
| <p>That's:</p> |
| <ul> |
| <li>5: It's a struct</li> |
| <li>"Data": Its name</li> |
| <li>0: Its display path, "crate_name"</li> |
| <li>1: Its canonical path, "crate_name::submodule"</li> |
| </ul> |
| <p>In both cases, the canonical path might not be public at all, |
| or it might be from another crate that isn't in the docs, |
| so it's never shown to the user, but is used for deduplication.</p> |
| <h2 id="testing-the-search-engine"><a class="header" href="#testing-the-search-engine">Testing the search engine</a></h2> |
| <p>While the generated UI is tested using <code>rustdoc-gui</code> tests, the |
| primary way the search engine is tested is the <code>rustdoc-js</code> and |
| <code>rustdoc-js-std</code> tests. They run in NodeJS.</p> |
| <p>A <code>rustdoc-js</code> test has a <code>.rs</code> and <code>.js</code> file, with the same name. |
| The <code>.rs</code> file specifies the hypothetical library crate to run |
| the searches on (make sure you mark anything you need to find as <code>pub</code>). |
| The <code>.js</code> file specifies the actual searches. |
| The <code>rustdoc-js-std</code> tests are the same, but don't require an <code>.rs</code> |
| file, since they use the standard library.</p> |
| <p>The <code>.js</code> file is like a module (except the loader takes care of |
| <code>exports</code> for you). It uses these variables:</p> |
| <div class="table-wrapper"><table><thead><tr><th>Name</th><th>Type</th><th>Description</th></tr></thead><tbody> |
| <tr><td><code>FILTER_CRATE</code></td><td><code>string</code></td><td>Only include results from the given crate. In the GUI, this is the "Results in <kbd>crate</kbd>" drop-down menu.</td></tr> |
| <tr><td><code>EXPECTED</code></td><td><code>[ResultsTable]|ResultsTable</code></td><td>List of tests to run, specifying what the hypothetical user types into the search box and sees in the tabs</td></tr> |
| <tr><td><code>PARSED</code></td><td><code>[ParsedQuery]|ParsedQuery</code></td><td>List of parser tests to run, without running an actual search</td></tr> |
| </tbody></table> |
| </div> |
| <p><code>FILTER_CRATE</code> can be left out (equivalent to searching "all crates"), but you |
| have to specify <code>EXPECTED</code> or <code>PARSED</code>.</p> |
| <p>By default, the test fails if any of the results specified in the test case are |
| not found after running the search, or if the results found after running the |
| search don't appear in the same order that they do in the test. |
| The actual search results may, however, include results that aren't in the test. |
| To override this, specify any of the following magic comments. |
| Put them on their own line, without indenting.</p> |
| <ul> |
| <li><code>// exact-check</code>: If search results appear that aren't part of the test case, |
| then fail.</li> |
| <li><code>// ignore-order</code>: Allow search results to appear in any order.</li> |
| <li><code>// should-fail</code>: Used to write negative tests.</li> |
| </ul> |
| <p>Standard library tests usually shouldn't specify <code>// exact-check</code>, since we |
| want the libs team to be able to add new items without causing unrelated |
| tests to fail, but standalone tests will use it more often.</p> |
| <p>The <code>ResultsTable</code> and <code>ParsedQuery</code> types are specified in |
| <a href="https://github.com/rust-lang/rust/blob/master/src/librustdoc/html/static/js/rustdoc.d.ts"><code>rustdoc.d.ts</code></a>.</p> |
| <p>For example, imagine we needed to fix a bug where a function named |
| <code>constructor</code> couldn't be found. To do this, write two files:</p> |
| <pre><pre class="playground"><code class="language-rust"><span class="boring">#![allow(unused)] |
| </span><span class="boring">fn main() { |
| </span>// tests/rustdoc-js/constructor_search.rs |
| // The test case needs to find this result. |
| pub fn constructor(_input: &str) -> i32 { 1 } |
| <span class="boring">}</span></code></pre></pre> |
| <pre><code class="language-js">// tests/rustdoc-js/constructor_search.js |
| // exact-check |
| // Since this test runs against its own crate, |
| // new items should not appear in the search results. |
| const EXPECTED = [ |
| // This first test targets name-based search. |
| { |
| query: "constructor", |
| others: [ |
| { path: "constructor_search", name: "constructor" }, |
| ], |
| in_args: [], |
| returned: [], |
| }, |
| // This test targets the second tab. |
| { |
| query: "str", |
| others: [], |
| in_args: [ |
| { path: "constructor_search", name: "constructor" }, |
| ], |
| returned: [], |
| }, |
| // This test targets the third tab. |
| { |
| query: "i32", |
| others: [], |
| in_args: [], |
| returned: [ |
| { path: "constructor_search", name: "constructor" }, |
| ], |
| }, |
| // This test targets advanced type-driven search. |
| { |
| query: "str -> i32", |
| others: [ |
| { path: "constructor_search", name: "constructor" }, |
| ], |
| in_args: [], |
| returned: [], |
| }, |
| ] |
| </code></pre> |
| |
| </main> |
| |
| <nav class="nav-wrapper" aria-label="Page navigation"> |
| <!-- Mobile navigation buttons --> |
| <a rel="prev" href="../rustdoc-internals.html" class="mobile-nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left"> |
| <i class="fa fa-angle-left"></i> |
| </a> |
| |
| <a rel="next prefetch" href="../rustdoc-internals/rustdoc-test-suite.html" class="mobile-nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right"> |
| <i class="fa fa-angle-right"></i> |
| </a> |
| |
| <div style="clear: both"></div> |
| </nav> |
| </div> |
| </div> |
| |
| <nav class="nav-wide-wrapper" aria-label="Page navigation"> |
| <a rel="prev" href="../rustdoc-internals.html" class="nav-chapters previous" title="Previous chapter" aria-label="Previous chapter" aria-keyshortcuts="Left"> |
| <i class="fa fa-angle-left"></i> |
| </a> |
| |
| <a rel="next prefetch" href="../rustdoc-internals/rustdoc-test-suite.html" class="nav-chapters next" title="Next chapter" aria-label="Next chapter" aria-keyshortcuts="Right"> |
| <i class="fa fa-angle-right"></i> |
| </a> |
| </nav> |
| |
| </div> |
| |
| |
| |
| |
| <script> |
| window.playground_copyable = true; |
| </script> |
| |
| |
| <script src="../elasticlunr.min.js"></script> |
| <script src="../mark.min.js"></script> |
| <script src="../searcher.js"></script> |
| |
| <script src="../clipboard.min.js"></script> |
| <script src="../highlight.js"></script> |
| <script src="../book.js"></script> |
| |
| <!-- Custom JS scripts --> |
| <script src="../mermaid.min.js"></script> |
| <script src="../mermaid-init.js"></script> |
| <script src="../pagetoc.js"></script> |
| |
| |
| </div> |
| </body> |
| </html> |