blob: 70b73cc29b72329bd93ce102adcf279051bd0b83 [file] [log] [blame] [edit]
//! This module provides a syntax highlighter for Rust code.
//! It is used by the `rustc --explain` command.
//!
//! The syntax highlighter uses `rustc_lexer`'s `tokenize`
//! function to parse the Rust code into a `Vec` of tokens.
//! The highlighter then highlights the tokens in the `Vec`,
//! and writes the highlighted output to the buffer.
use std::io::{self, Write};
use anstyle::{AnsiColor, Color, Effects, Style};
use rustc_lexer::{LiteralKind, strip_shebang, tokenize};
const PRIMITIVE_TYPES: &'static [&str] = &[
"i8", "i16", "i32", "i64", "i128", "isize", // signed integers
"u8", "u16", "u32", "u64", "u128", "usize", // unsigned integers
"f32", "f64", // floating point
"char", "bool", // others
];
const KEYWORDS: &'static [&str] = &[
"static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where", "while", "as",
"async", "await", "break", "const", "continue", "crate", "dyn", "else", "enum", "extern",
"false", "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub",
"ref",
];
const STR_LITERAL_COLOR: AnsiColor = AnsiColor::Green;
const OTHER_LITERAL_COLOR: AnsiColor = AnsiColor::BrightRed;
const DERIVE_COLOR: AnsiColor = AnsiColor::BrightRed;
const KEYWORD_COLOR: AnsiColor = AnsiColor::BrightMagenta;
const TYPE_COLOR: AnsiColor = AnsiColor::Yellow;
const FUNCTION_COLOR: AnsiColor = AnsiColor::Blue;
const USE_COLOR: AnsiColor = AnsiColor::BrightMagenta;
const PRIMITIVE_TYPE_COLOR: AnsiColor = AnsiColor::Cyan;
/// Highlight a Rust code string and write the highlighted
/// output to the buffer. It serves as a wrapper around
/// `Highlighter::highlight_rustc_lexer`. It is passed to
/// `write_anstream_buf` in the `lib.rs` file.
pub fn highlight(code: &str, buf: &mut Vec<u8>) -> io::Result<()> {
let mut highlighter = Highlighter::default();
highlighter.highlight_rustc_lexer(code, buf)
}
/// A syntax highlighter for Rust code
/// It is used by the `rustc --explain` command.
#[derive(Default)]
pub struct Highlighter {
/// Used to track if the previous token was a token
/// that warrants the next token to be colored differently
///
/// For example, the keyword `fn` requires the next token
/// (the function name) to be colored differently.
prev_was_special: bool,
/// Used to track the length of tokens that have been
/// written so far. This is used to find the original
/// lexeme for a token from the code string.
len_accum: usize,
}
impl Highlighter {
/// Create a new highlighter
pub fn new() -> Self {
Self::default()
}
/// Highlight a Rust code string and write the highlighted
/// output to the buffer.
pub fn highlight_rustc_lexer(&mut self, code: &str, buf: &mut Vec<u8>) -> io::Result<()> {
use rustc_lexer::TokenKind;
// Remove shebang from code string
let stripped_idx = strip_shebang(code).unwrap_or(0);
let stripped_code = &code[stripped_idx..];
self.len_accum = stripped_idx;
let len_accum = &mut self.len_accum;
let tokens = tokenize(stripped_code, rustc_lexer::FrontmatterAllowed::No);
for token in tokens {
let len = token.len as usize;
// If the previous token was a special token, and this token is
// not a whitespace token, then it should be colored differently
let token_str = &code[*len_accum..*len_accum + len];
if self.prev_was_special {
if token_str != " " {
self.prev_was_special = false;
}
let style = Style::new().fg_color(Some(Color::Ansi(AnsiColor::Blue)));
write!(buf, "{style}{token_str}{style:#}")?;
*len_accum += len;
continue;
}
match token.kind {
TokenKind::Ident => {
let mut style = Style::new();
// Match if an identifier is a (well-known) keyword
if KEYWORDS.contains(&token_str) {
if token_str == "fn" {
self.prev_was_special = true;
}
style = style.fg_color(Some(Color::Ansi(KEYWORD_COLOR)));
}
// The `use` keyword is colored differently
if matches!(token_str, "use") {
style = style.fg_color(Some(Color::Ansi(USE_COLOR)));
}
// This heuristic test is to detect if the identifier is
// a function call. If it is, then the function identifier is
// colored differently.
if code[*len_accum..*len_accum + len + 1].ends_with('(') {
style = style.fg_color(Some(Color::Ansi(FUNCTION_COLOR)));
}
// The `derive` keyword is colored differently.
if token_str == "derive" {
style = style.fg_color(Some(Color::Ansi(DERIVE_COLOR)));
}
// This heuristic test is to detect if the identifier is
// a type. If it is, then the identifier is colored differently.
if matches!(token_str.chars().next().map(|c| c.is_uppercase()), Some(true)) {
style = style.fg_color(Some(Color::Ansi(TYPE_COLOR)));
}
// This if statement is to detect if the identifier is a primitive type.
if PRIMITIVE_TYPES.contains(&token_str) {
style = style.fg_color(Some(Color::Ansi(PRIMITIVE_TYPE_COLOR)));
}
write!(buf, "{style}{token_str}{style:#}")?;
}
// Color literals
TokenKind::Literal { kind, suffix_start: _ } => {
// Strings -> Green
// Chars -> Green
// Raw strings -> Green
// C strings -> Green
// Byte Strings -> Green
// Other literals -> Bright Red (Orage-esque)
let style = match kind {
LiteralKind::Str { terminated: _ }
| LiteralKind::Char { terminated: _ }
| LiteralKind::RawStr { n_hashes: _ }
| LiteralKind::CStr { terminated: _ } => {
Style::new().fg_color(Some(Color::Ansi(STR_LITERAL_COLOR)))
}
_ => Style::new().fg_color(Some(Color::Ansi(OTHER_LITERAL_COLOR))),
};
write!(buf, "{style}{token_str}{style:#}")?;
}
_ => {
// All other tokens are dimmed
let style = Style::new()
.fg_color(Some(Color::Ansi(AnsiColor::BrightWhite)))
.effects(Effects::DIMMED);
write!(buf, "{style}{token_str}{style:#}")?;
}
}
*len_accum += len;
}
Ok(())
}
}