blob: cc5e8a24d0d3a9bf03e771ab71ec2171078e8d1f [file] [log] [blame]
//! Support for rendering the grammar.
use crate::{Diagnostics, warn_or_err};
use mdbook::book::{Book, BookItem, Chapter};
use regex::{Captures, Regex};
use std::collections::{HashMap, HashSet};
use std::fmt::Write;
use std::path::PathBuf;
use std::sync::LazyLock;
mod parser;
mod render_markdown;
mod render_railroad;
#[derive(Debug, Default)]
pub struct Grammar {
pub productions: HashMap<String, Production>,
/// The order that the production names were discovered.
pub name_order: Vec<String>,
}
#[derive(Debug)]
pub struct Production {
name: String,
/// Category is from the markdown lang string, and defines how it is
/// grouped and organized on the summary page.
category: String,
expression: Expression,
/// The path to the chapter where this is defined.
path: PathBuf,
is_root: bool,
}
#[derive(Clone, Debug)]
struct Expression {
kind: ExpressionKind,
/// Suffix is the `_foo_` part that is shown as a subscript.
suffix: Option<String>,
/// A footnote is a markdown footnote link.
footnote: Option<String>,
}
#[derive(Clone, Debug)]
enum ExpressionKind {
/// `( A B C )`
Grouped(Box<Expression>),
/// `A | B | C`
Alt(Vec<Expression>),
/// `A B C`
Sequence(Vec<Expression>),
/// `A?`
Optional(Box<Expression>),
/// `A*`
Repeat(Box<Expression>),
/// `A*?`
RepeatNonGreedy(Box<Expression>),
/// `A+`
RepeatPlus(Box<Expression>),
/// `A+?`
RepeatPlusNonGreedy(Box<Expression>),
/// `A{2..4}`
RepeatRange(Box<Expression>, Option<u32>, Option<u32>),
/// `NonTerminal`
Nt(String),
/// `` `string` ``
Terminal(String),
/// `<english description>`
Prose(String),
/// An LF followed by the given number of spaces.
///
/// Used by the renderer to help format and structure the grammar.
Break(usize),
/// ``[`A`-`Z` `_` LF]``
Charset(Vec<Characters>),
/// ``~[` ` LF]``
NegExpression(Box<Expression>),
/// `U+0060`
Unicode(String),
}
#[derive(Clone, Debug)]
enum Characters {
/// `LF`
Named(String),
/// `` `_` ``
Terminal(String),
/// `` `A`-`Z` ``
Range(char, char),
}
#[derive(Debug)]
pub struct RenderCtx {
md_link_map: HashMap<String, String>,
rr_link_map: HashMap<String, String>,
for_summary: bool,
}
impl Grammar {
fn visit_nt(&self, callback: &mut dyn FnMut(&str)) {
for p in self.productions.values() {
p.expression.visit_nt(callback);
}
}
}
impl Expression {
fn new_kind(kind: ExpressionKind) -> Self {
Self {
kind,
suffix: None,
footnote: None,
}
}
fn visit_nt(&self, callback: &mut dyn FnMut(&str)) {
match &self.kind {
ExpressionKind::Grouped(e)
| ExpressionKind::Optional(e)
| ExpressionKind::Repeat(e)
| ExpressionKind::RepeatNonGreedy(e)
| ExpressionKind::RepeatPlus(e)
| ExpressionKind::RepeatPlusNonGreedy(e)
| ExpressionKind::RepeatRange(e, _, _)
| ExpressionKind::NegExpression(e) => {
e.visit_nt(callback);
}
ExpressionKind::Alt(es) | ExpressionKind::Sequence(es) => {
for e in es {
e.visit_nt(callback);
}
}
ExpressionKind::Nt(nt) => {
callback(&nt);
}
ExpressionKind::Terminal(_)
| ExpressionKind::Prose(_)
| ExpressionKind::Break(_)
| ExpressionKind::Unicode(_) => {}
ExpressionKind::Charset(set) => {
for ch in set {
match ch {
Characters::Named(s) => callback(s),
Characters::Terminal(_) | Characters::Range(_, _) => {}
}
}
}
}
}
fn is_break(&self) -> bool {
matches!(self.kind, ExpressionKind::Break(_))
}
}
static GRAMMAR_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?ms)^```grammar,([^\n]+)\n(.*?)^```").unwrap());
static NAMES_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r"(?m)^(?:@root )?([A-Za-z0-9_]+)(?: \([^)]+\))? ->").unwrap());
/// Loads the [`Grammar`] from the book.
pub fn load_grammar(book: &Book, diag: &mut Diagnostics) -> Grammar {
let mut grammar = Grammar::default();
for item in book.iter() {
let BookItem::Chapter(ch) = item else {
continue;
};
if ch.is_draft_chapter() {
continue;
}
let path = ch.path.as_ref().unwrap().to_owned();
for cap in GRAMMAR_RE.captures_iter(&ch.content) {
let category = &cap[1];
let input = &cap[2];
if let Err(e) = parser::parse_grammar(input, &mut grammar, category, &path) {
warn_or_err!(diag, "failed to parse grammar in {path:?}: {e}");
}
}
}
check_undefined_nt(&grammar, diag);
check_unexpected_roots(&grammar, diag);
grammar
}
/// Checks for nonterminals that are used but not defined.
fn check_undefined_nt(grammar: &Grammar, diag: &mut Diagnostics) {
grammar.visit_nt(&mut |nt| {
if !grammar.productions.contains_key(nt) {
warn_or_err!(diag, "non-terminal `{nt}` is used but not defined");
}
});
}
/// This checks that all the grammar roots are what we expect.
///
/// This is intended to help catch any unexpected misspellings, orphaned
/// productions, or general mistakes.
fn check_unexpected_roots(grammar: &Grammar, diag: &mut Diagnostics) {
let mut set: HashSet<_> = grammar.name_order.iter().map(|s| s.as_str()).collect();
grammar.visit_nt(&mut |nt| {
set.remove(nt);
});
let expected: HashSet<_> = grammar
.productions
.values()
.filter_map(|p| p.is_root.then(|| p.name.as_str()))
.collect();
if set != expected {
let new: Vec<_> = set.difference(&expected).collect();
let removed: Vec<_> = expected.difference(&set).collect();
if !new.is_empty() {
warn_or_err!(
diag,
"New grammar production detected that is not used in any other\n\
production. If this is expected, mark the production with\n\
`@root`. If not, make sure it is spelled correctly and used in\n\
another production.\n\
\n\
The new names are: {new:?}\n"
);
} else if !removed.is_empty() {
warn_or_err!(
diag,
"Old grammar production root seems to have been removed.\n\
If this is expected, remove `@root` from the production.\n\
\n\
The removed names are: {removed:?}\n"
);
} else {
unreachable!("unexpected");
}
}
}
/// Replaces the text grammar in the given chapter with the rendered version.
pub fn insert_grammar(grammar: &Grammar, chapter: &Chapter, diag: &mut Diagnostics) -> String {
let link_map = make_relative_link_map(grammar, chapter);
let mut content = GRAMMAR_RE
.replace_all(&chapter.content, |cap: &Captures<'_>| {
let names: Vec<_> = NAMES_RE
.captures_iter(&cap[2])
.map(|cap| cap.get(1).unwrap().as_str())
.collect();
let for_lexer = &cap[1] == "lexer";
render_names(grammar, &names, &link_map, for_lexer, chapter, diag)
})
.to_string();
// Make all production names easily linkable.
let is_summary = is_summary(chapter);
for (name, path) in &link_map {
let id = render_markdown::markdown_id(name, is_summary);
if is_summary {
// On the summary page, link to the production on the summary page.
writeln!(content, "[{name}]: #{id}").unwrap();
} else {
// This includes two variants, one for convenience (like
// `[ArrayExpression]`), and one with the `grammar-` prefix to
// disambiguate links that have the same name as a rule (rules
// take precedence).
writeln!(
content,
"[{name}]: {path}#{id}\n\
[grammar-{name}]: {path}#{id}"
)
.unwrap();
}
}
content
}
/// Creates a map of production name -> relative link path.
fn make_relative_link_map(grammar: &Grammar, chapter: &Chapter) -> HashMap<String, String> {
let current_path = chapter.path.as_ref().unwrap().parent().unwrap();
grammar
.productions
.values()
.map(|p| {
let relative = pathdiff::diff_paths(&p.path, current_path).unwrap();
// Adjust paths for Windows.
let relative = relative.display().to_string().replace('\\', "/");
(p.name.clone(), relative)
})
.collect()
}
/// Helper to take a list of production names and to render all of those to a
/// mixture of markdown and HTML.
fn render_names(
grammar: &Grammar,
names: &[&str],
link_map: &HashMap<String, String>,
for_lexer: bool,
chapter: &Chapter,
diag: &mut Diagnostics,
) -> String {
let for_summary = is_summary(chapter);
let mut output = String::new();
output.push_str(
"<div class=\"grammar-container\">\n\
\n",
);
if for_lexer {
output.push_str("**<sup>Lexer</sup>**\n");
} else {
output.push_str("**<sup>Syntax</sup>**\n");
}
output.push_str("<br>\n");
// Convert the link map to add the id.
let update_link_map = |get_id: fn(&str, bool) -> String| -> HashMap<String, String> {
link_map
.iter()
.map(|(name, path)| {
let id = get_id(name, for_summary);
let path = if for_summary {
format!("#{id}")
} else {
format!("{path}#{id}")
};
(name.clone(), path)
})
.collect()
};
let render_ctx = RenderCtx {
md_link_map: update_link_map(render_markdown::markdown_id),
rr_link_map: update_link_map(render_railroad::railroad_id),
for_summary,
};
if let Err(e) = grammar.render_markdown(&render_ctx, &names, &mut output) {
warn_or_err!(
diag,
"grammar failed in chapter {:?}: {e}",
chapter.source_path.as_ref().unwrap()
);
}
output.push_str(
"\n\
<button class=\"grammar-toggle-railroad\" type=\"button\" \
title=\"Toggle railroad display\" \
onclick=\"toggle_railroad()\">\
Show Railroad\
</button>\n\
</div>\n\
<div class=\"grammar-railroad grammar-hidden\">\n\
\n",
);
if let Err(e) = grammar.render_railroad(&render_ctx, &names, &mut output) {
warn_or_err!(
diag,
"grammar failed in chapter {:?}: {e}",
chapter.source_path.as_ref().unwrap()
);
}
output.push_str("</div>\n");
output
}
pub fn is_summary(chapter: &Chapter) -> bool {
chapter.name == "Grammar summary"
}
/// Inserts the summary of all grammar rules into the grammar summary chapter.
pub fn insert_summary(grammar: &Grammar, chapter: &Chapter, diag: &mut Diagnostics) -> String {
let link_map = make_relative_link_map(grammar, chapter);
let mut seen = HashSet::new();
let categories: Vec<_> = grammar
.name_order
.iter()
.map(|name| &grammar.productions[name].category)
.filter(|cat| seen.insert(*cat))
.collect();
let mut grammar_summary = String::new();
for category in categories {
let mut chars = category.chars();
let cap = chars.next().unwrap().to_uppercase().collect::<String>() + chars.as_str();
write!(grammar_summary, "\n## {cap} summary\n\n").unwrap();
let names: Vec<_> = grammar
.name_order
.iter()
.filter(|name| grammar.productions[*name].category == *category)
.map(|s| s.as_str())
.collect();
let for_lexer = category == "lexer";
let s = render_names(grammar, &names, &link_map, for_lexer, chapter, diag);
grammar_summary.push_str(&s);
}
chapter
.content
.replace("{{ grammar-summary }}", &grammar_summary)
}