| //! Support for translating links to the standard library. |
| |
| use crate::{Diagnostics, bug, warn_or_err}; |
| use anyhow::{Result, bail}; |
| use mdbook::BookItem; |
| use mdbook::book::{Book, Chapter}; |
| use once_cell::sync::Lazy; |
| use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag}; |
| use regex::Regex; |
| use std::collections::HashMap; |
| use std::fmt::Write as _; |
| use std::fs; |
| use std::ops::Range; |
| use std::path::PathBuf; |
| use std::process::Command; |
| use tempfile::TempDir; |
| |
| /// The Regex used to extract the std links from the HTML generated by rustdoc. |
| static STD_LINK_EXTRACT_RE: Lazy<Regex> = |
| Lazy::new(|| Regex::new(r#"<li>LINK: (.*)</li>"#).unwrap()); |
| |
| /// The Regex used to extract the URL from an HTML link. |
| static ANCHOR_URL: Lazy<Regex> = Lazy::new(|| Regex::new("<a href=\"([^\"]+)\"").unwrap()); |
| |
| /// Regex for a markdown inline link, like `[foo](bar)`. |
| static MD_LINK_INLINE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?s)(\[.+\])(\(.+\))").unwrap()); |
| /// Regex for a markdown reference link, like `[foo][bar]`. |
| static MD_LINK_REFERENCE: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?s)(\[.+\])(\[.*\])").unwrap()); |
| /// Regex for a markdown shortcut link, like `[foo]`. |
| static MD_LINK_SHORTCUT: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?s)(\[.+\])").unwrap()); |
| |
| /// Converts links to the standard library to the online documentation in a |
| /// fashion similar to rustdoc intra-doc links. |
| pub fn std_links(book: &mut Book, diag: &mut Diagnostics) { |
| // Collect all links in all chapters. |
| let mut chapter_links = HashMap::new(); |
| for item in book.iter() { |
| let BookItem::Chapter(ch) = item else { |
| continue; |
| }; |
| if ch.is_draft_chapter() { |
| continue; |
| } |
| let key = ch.source_path.as_ref().unwrap(); |
| chapter_links.insert(key, collect_markdown_links(&ch, diag)); |
| } |
| // Write a Rust source file to use with rustdoc to generate intra-doc links. |
| let tmp = TempDir::with_prefix("mdbook-spec-").unwrap(); |
| if let Err(e) = run_rustdoc(&tmp, &chapter_links, diag) { |
| warn_or_err!(diag, "{e:?}"); |
| return; |
| } |
| |
| // Extract the links from the generated html. |
| let generated = fs::read_to_string(tmp.path().join("doc/a/index.html")) |
| .expect("index.html failed to generate"); |
| let mut urls: Vec<_> = STD_LINK_EXTRACT_RE |
| .captures_iter(&generated) |
| .map(|cap| cap.get(1).unwrap().as_str()) |
| .collect(); |
| let mut urls = &mut urls[..]; |
| let expected_len: usize = chapter_links.values().map(|l| l.len()).sum(); |
| if urls.len() != expected_len { |
| bug!( |
| "expected rustdoc to generate {} links, but found {}", |
| expected_len, |
| urls.len(), |
| ); |
| } |
| // Unflatten the urls list so that it is split back by chapter. |
| let mut ch_urls: HashMap<&PathBuf, Vec<_>> = HashMap::new(); |
| for (ch_path, links) in &chapter_links { |
| let xs; |
| (xs, urls) = urls.split_at_mut(links.len()); |
| ch_urls.insert(ch_path, xs.into()); |
| } |
| |
| // Do this in two passes to deal with lifetimes. |
| let mut ch_contents = HashMap::new(); |
| for item in book.iter() { |
| let BookItem::Chapter(ch) = item else { |
| continue; |
| }; |
| if ch.is_draft_chapter() { |
| continue; |
| } |
| let key = ch.source_path.as_ref().unwrap(); |
| // Create a list of replacements to make in the raw markdown to point to the new url. |
| let replacements = compute_replacements(&ch, &chapter_links[key], &ch_urls[key], diag); |
| |
| let mut new_contents = ch.content.clone(); |
| for (md_link, url, range) in replacements { |
| // Convert links to be relative so that links work offline and |
| // with the linkchecker. |
| let url = relative_url(url, ch); |
| // Note that this may orphan reference link definitions. This should |
| // probably remove them, but pulldown_cmark doesn't give the span for |
| // the reference definition. |
| new_contents.replace_range(range, &format!("{md_link}({url})")); |
| } |
| ch_contents.insert(key.clone(), new_contents); |
| } |
| |
| // Replace the content with the new content. |
| book.for_each_mut(|item| { |
| let BookItem::Chapter(ch) = item else { |
| return; |
| }; |
| if ch.is_draft_chapter() { |
| return; |
| } |
| let key = ch.source_path.as_ref().unwrap(); |
| let content = ch_contents.remove(key).unwrap(); |
| ch.content = content; |
| }); |
| } |
| |
| #[derive(Debug)] |
| struct Link<'a> { |
| link_type: LinkType, |
| /// Where the link is going to, for example `std::ffi::OsString`. |
| dest_url: CowStr<'a>, |
| /// The span in the original markdown where the link is located. |
| /// |
| /// Note that this is the post-processed markdown (such as having rules |
| /// expanded), not the markdown on the disk. |
| /// |
| /// Note that during translation, all links will be converted to inline |
| /// links. That means that for reference-style links, the link reference |
| /// definition will end up being ignored in the final markdown. For |
| /// example, a link like ``[`OsString`]`` with a definition |
| /// ``[`OsString`]: std::ffi::OsString`` will convert the link to |
| /// ``[`OsString`](https://doc.rust-lang.org/std/ffi/struct.OsString.html)`. |
| range: Range<usize>, |
| } |
| |
| /// Collects all markdown links that look like they might be standard library links. |
| fn collect_markdown_links<'a>(chapter: &'a Chapter, diag: &mut Diagnostics) -> Vec<Link<'a>> { |
| let mut opts = Options::empty(); |
| opts.insert(Options::ENABLE_TABLES); |
| opts.insert(Options::ENABLE_FOOTNOTES); |
| opts.insert(Options::ENABLE_STRIKETHROUGH); |
| opts.insert(Options::ENABLE_TASKLISTS); |
| opts.insert(Options::ENABLE_HEADING_ATTRIBUTES); |
| opts.insert(Options::ENABLE_SMART_PUNCTUATION); |
| |
| let mut broken_links = Vec::new(); |
| let mut links = Vec::new(); |
| |
| // Broken links are collected so that you can write something like |
| // `[std::option::Option]` which in pulldown_cmark's eyes is a broken |
| // link. However, that is the normal syntax for rustdoc. |
| let broken_link = |broken_link: BrokenLink<'_>| { |
| broken_links.push(Link { |
| link_type: broken_link.link_type, |
| // Necessary due to lifetime issues. |
| dest_url: CowStr::Boxed(broken_link.reference.into_string().into()), |
| range: broken_link.span.clone(), |
| }); |
| None |
| }; |
| |
| let parser = Parser::new_with_broken_link_callback(&chapter.content, opts, Some(broken_link)) |
| .into_offset_iter(); |
| for (event, range) in parser { |
| match event { |
| Event::Start(Tag::Link { |
| link_type, |
| dest_url, |
| title, |
| id: _, |
| }) => { |
| // Only collect links that are for the standard library. |
| if matches!(link_type, LinkType::Autolink | LinkType::Email) { |
| continue; |
| } |
| if dest_url.starts_with("http") |
| || dest_url.contains(".md") |
| || dest_url.contains(".html") |
| || dest_url.starts_with('#') |
| { |
| continue; |
| } |
| if !title.is_empty() { |
| warn_or_err!( |
| diag, |
| "titles in links are not supported\n\ |
| Link {dest_url} has title `{title}` found in chapter {} ({:?})", |
| chapter.name, |
| chapter.source_path.as_ref().unwrap() |
| ); |
| } |
| links.push(Link { |
| link_type, |
| dest_url, |
| range: range.clone(), |
| }); |
| } |
| _ => {} |
| } |
| } |
| links.extend(broken_links); |
| links |
| } |
| |
| /// Generates links using rustdoc. |
| /// |
| /// This takes the given links and creates a temporary Rust source file |
| /// containing those links within doc-comments, and then runs rustdoc to |
| /// generate intra-doc links on them. |
| /// |
| /// The output will be in the given `tmp` directory. |
| fn run_rustdoc( |
| tmp: &TempDir, |
| chapter_links: &HashMap<&PathBuf, Vec<Link<'_>>>, |
| diag: &mut Diagnostics, |
| ) -> Result<()> { |
| let src_path = tmp.path().join("a.rs"); |
| // Allow redundant since there could some in-scope things that are |
| // technically not necessary, but we don't care about (like |
| // [`Option`](std::option::Option)). |
| let mut src = format!( |
| "#![{}(rustdoc::broken_intra_doc_links)]\n\ |
| #![allow(rustdoc::redundant_explicit_links)]\n", |
| if diag.deny_warnings { "deny" } else { "warn" } |
| ); |
| // This uses a list to make easy to pull the links out of the generated HTML. |
| for (_ch_path, links) in chapter_links { |
| for link in links { |
| match link.link_type { |
| LinkType::Inline |
| | LinkType::Reference |
| | LinkType::Collapsed |
| | LinkType::Shortcut => { |
| writeln!(src, "//! - LINK: [{}]", link.dest_url).unwrap(); |
| } |
| LinkType::ReferenceUnknown |
| | LinkType::CollapsedUnknown |
| | LinkType::ShortcutUnknown => { |
| // These should only happen due to broken link replacements. |
| bug!("unexpected link type unknown {link:?}"); |
| } |
| LinkType::Autolink | LinkType::Email => { |
| bug!("link type should have been filtered {link:?}"); |
| } |
| } |
| } |
| } |
| // Put some common things into scope so that links to them work. |
| writeln!( |
| src, |
| "extern crate alloc;\n\ |
| extern crate proc_macro;\n\ |
| extern crate test;\n" |
| ) |
| .unwrap(); |
| fs::write(&src_path, &src).unwrap(); |
| let rustdoc = std::env::var("RUSTDOC").unwrap_or_else(|_| "rustdoc".into()); |
| let output = Command::new(rustdoc) |
| .arg("--edition=2024") |
| .arg(&src_path) |
| .current_dir(tmp.path()) |
| .output() |
| .expect("rustdoc installed"); |
| if !output.status.success() { |
| let stderr = String::from_utf8_lossy(&output.stderr); |
| bail!( |
| "failed to extract std links ({:?})\n{stderr}", |
| output.status |
| ); |
| } |
| Ok(()) |
| } |
| |
| static DOC_URL: Lazy<Regex> = Lazy::new(|| { |
| Regex::new(r"^https://doc.rust-lang.org/(?:nightly|beta|stable|dev|1\.[0-9]+\.[0-9]+)").unwrap() |
| }); |
| |
| /// Converts a URL to doc.rust-lang.org to be relative. |
| fn relative_url(url: &str, chapter: &Chapter) -> String { |
| // Set SPEC_RELATIVE=0 to disable this, which can be useful for working locally. |
| if std::env::var("SPEC_RELATIVE").as_deref() != Ok("0") { |
| let Some(url_start) = DOC_URL.shortest_match(url) else { |
| bug!("expected rustdoc URL to start with {DOC_URL:?}, got {url}"); |
| }; |
| let url_path = &url[url_start..]; |
| let num_dots = chapter.path.as_ref().unwrap().components().count(); |
| let dots = vec![".."; num_dots].join("/"); |
| format!("{dots}{url_path}") |
| } else { |
| url.to_string() |
| } |
| } |
| |
| /// Computes the replacements to make in the markdown content. |
| /// |
| /// Returns a `Vec` of `(md_link, url, range)` where: |
| /// |
| /// - `md_link` is the markdown link string to show to the user (like `[foo]`). |
| /// - `url` is the URL to the standard library. |
| /// - `range` is the range in the original markdown to replace with the new link. |
| fn compute_replacements<'a>( |
| chapter: &'a Chapter, |
| links: &[Link<'_>], |
| urls: &[&'a str], |
| diag: &mut Diagnostics, |
| ) -> Vec<(&'a str, &'a str, Range<usize>)> { |
| let mut replacements = Vec::new(); |
| |
| for (url, link) in urls.iter().zip(links) { |
| let Some(cap) = ANCHOR_URL.captures(url) else { |
| let line = super::line_from_range(&chapter.content, &link.range); |
| warn_or_err!( |
| diag, |
| "broken markdown link found in {}\n\ |
| Line is: {line}\n\ |
| Link to `{}` could not be resolved by rustdoc to a known URL (result was `{}`).\n", |
| chapter.source_path.as_ref().unwrap().display(), |
| link.dest_url, |
| url |
| ); |
| continue; |
| }; |
| let url = cap.get(1).unwrap().as_str(); |
| let md_link = &chapter.content[link.range.clone()]; |
| |
| let range = link.range.clone(); |
| let add_link = |re: &Regex| { |
| let Some(cap) = re.captures(md_link) else { |
| bug!( |
| "expected link `{md_link}` of type {:?} to match regex {re}", |
| link.link_type |
| ); |
| }; |
| let md_link = cap.get(1).unwrap().as_str(); |
| replacements.push((md_link, url, range)); |
| }; |
| |
| match link.link_type { |
| LinkType::Inline => { |
| add_link(&MD_LINK_INLINE); |
| } |
| LinkType::Reference | LinkType::Collapsed => { |
| add_link(&MD_LINK_REFERENCE); |
| } |
| LinkType::Shortcut => { |
| add_link(&MD_LINK_SHORTCUT); |
| } |
| _ => { |
| bug!("unexpected link type: {link:#?}"); |
| } |
| } |
| } |
| // Sort and reverse (so that it can replace bottom-up so ranges don't shift). |
| replacements.sort_by(|a, b| b.2.clone().partial_cmp(a.2.clone()).unwrap()); |
| replacements |
| } |