| use rustc::lint::*; |
| use syntax::ast; |
| use syntax::codemap::{Span, BytePos}; |
| use utils::span_lint; |
| |
| /// **What it does:** Checks for the presence of `_`, `::` or camel-case words |
| /// outside ticks in documentation. |
| /// |
| /// **Why is this bad?** *Rustdoc* supports markdown formatting, `_`, `::` and |
| /// camel-case probably indicates some code which should be included between |
| /// ticks. `_` can also be used for empasis in markdown, this lint tries to |
| /// consider that. |
| /// |
| /// **Known problems:** Lots of bad docs won’t be fixed, what the lint checks |
| /// for is limited, and there are still false positives. |
| /// |
| /// **Examples:** |
| /// ```rust |
| /// /// Do something with the foo_bar parameter. See also that::other::module::foo. |
| /// // ^ `foo_bar` and `that::other::module::foo` should be ticked. |
| /// fn doit(foo_bar) { .. } |
| /// ``` |
| declare_lint! { |
| pub DOC_MARKDOWN, |
| Warn, |
| "presence of `_`, `::` or camel-case outside backticks in documentation" |
| } |
| |
| #[derive(Clone)] |
| pub struct Doc { |
| valid_idents: Vec<String>, |
| } |
| |
| impl Doc { |
| pub fn new(valid_idents: Vec<String>) -> Self { |
| Doc { valid_idents: valid_idents } |
| } |
| } |
| |
| impl LintPass for Doc { |
| fn get_lints(&self) -> LintArray { |
| lint_array![DOC_MARKDOWN] |
| } |
| } |
| |
| impl EarlyLintPass for Doc { |
| fn check_crate(&mut self, cx: &EarlyContext, krate: &ast::Crate) { |
| check_attrs(cx, &self.valid_idents, &krate.attrs); |
| } |
| |
| fn check_item(&mut self, cx: &EarlyContext, item: &ast::Item) { |
| check_attrs(cx, &self.valid_idents, &item.attrs); |
| } |
| } |
| |
| /// Cleanup documentation decoration (`///` and such). |
| /// |
| /// We can't use `syntax::attr::AttributeMethods::with_desugared_doc` or |
| /// `syntax::parse::lexer::comments::strip_doc_comment_decoration` because we need to keep track of |
| /// the span but this function is inspired from the later. |
| #[allow(cast_possible_truncation)] |
| pub fn strip_doc_comment_decoration((comment, span): (String, Span)) -> Vec<(String, Span)> { |
| // one-line comments lose their prefix |
| const ONELINERS: &'static [&'static str] = &["///!", "///", "//!", "//"]; |
| for prefix in ONELINERS { |
| if comment.starts_with(*prefix) { |
| return vec![(comment[prefix.len()..].to_owned(), |
| Span { lo: span.lo + BytePos(prefix.len() as u32), ..span })]; |
| } |
| } |
| |
| if comment.starts_with("/*") { |
| return comment[3..comment.len() - 2] |
| .lines() |
| .map(|line| { |
| let offset = line.as_ptr() as usize - comment.as_ptr() as usize; |
| debug_assert_eq!(offset as u32 as usize, offset); |
| |
| (line.to_owned(), Span { lo: span.lo + BytePos(offset as u32), ..span }) |
| }) |
| .collect(); |
| } |
| |
| panic!("not a doc-comment: {}", comment); |
| } |
| |
| pub fn check_attrs<'a>(cx: &EarlyContext, valid_idents: &[String], attrs: &'a [ast::Attribute]) { |
| let mut docs = vec![]; |
| |
| for attr in attrs { |
| if attr.is_sugared_doc { |
| if let Some(ref doc) = attr.value_str() { |
| let doc = (*doc.as_str()).to_owned(); |
| docs.extend_from_slice(&strip_doc_comment_decoration((doc, attr.span))); |
| } |
| } |
| } |
| |
| if !docs.is_empty() { |
| let _ = check_doc(cx, valid_idents, &docs); |
| } |
| } |
| |
| #[allow(while_let_loop)] // #362 |
| fn check_doc(cx: &EarlyContext, valid_idents: &[String], docs: &[(String, Span)]) -> Result<(), ()> { |
| // In markdown, `_` can be used to emphasize something, or, is a raw `_` depending on context. |
| // There really is no markdown specification that would disambiguate this properly. This is |
| // what GitHub and Rustdoc do: |
| // |
| // foo_bar test_quz → foo_bar test_quz |
| // foo_bar_baz → foo_bar_baz (note that the “official” spec says this should be emphasized) |
| // _foo bar_ test_quz_ → <em>foo bar</em> test_quz_ |
| // \_foo bar\_ → _foo bar_ |
| // (_baz_) → (<em>baz</em>) |
| // foo _ bar _ baz → foo _ bar _ baz |
| |
| /// Character that can appear in a path |
| fn is_path_char(c: char) -> bool { |
| match c { |
| t if t.is_alphanumeric() => true, |
| ':' | '_' => true, |
| _ => false, |
| } |
| } |
| |
| #[derive(Clone, Debug)] |
| /// This type is used to iterate through the documentation characters, keeping the span at the |
| /// same time. |
| struct Parser<'a> { |
| /// First byte of the current potential match |
| current_word_begin: usize, |
| /// List of lines and their associated span |
| docs: &'a [(String, Span)], |
| /// Index of the current line we are parsing |
| line: usize, |
| /// Whether we are in a link |
| link: bool, |
| /// Whether we are at the beginning of a line |
| new_line: bool, |
| /// Whether we were to the end of a line last time `next` was called |
| reset: bool, |
| /// The position of the current character within the current line |
| pos: usize, |
| } |
| |
| impl<'a> Parser<'a> { |
| fn advance_begin(&mut self) { |
| self.current_word_begin = self.pos; |
| } |
| |
| fn line(&self) -> (&'a str, Span) { |
| let (ref doc, span) = self.docs[self.line]; |
| (doc, span) |
| } |
| |
| fn peek(&self) -> Option<char> { |
| self.line().0[self.pos..].chars().next() |
| } |
| |
| #[allow(while_let_on_iterator)] // borrowck complains about for |
| fn jump_to(&mut self, n: char) -> Result<bool, ()> { |
| while let Some((new_line, c)) = self.next() { |
| if c == n { |
| self.advance_begin(); |
| return Ok(new_line); |
| } |
| } |
| |
| Err(()) |
| } |
| |
| fn next_line(&mut self) { |
| self.pos = 0; |
| self.current_word_begin = 0; |
| self.line += 1; |
| self.new_line = true; |
| } |
| |
| fn put_back(&mut self, c: char) { |
| self.pos -= c.len_utf8(); |
| } |
| |
| #[allow(cast_possible_truncation)] |
| fn word(&self) -> (&'a str, Span) { |
| let begin = self.current_word_begin; |
| let end = self.pos; |
| |
| debug_assert_eq!(end as u32 as usize, end); |
| debug_assert_eq!(begin as u32 as usize, begin); |
| |
| let (doc, mut span) = self.line(); |
| span.hi = span.lo + BytePos(end as u32); |
| span.lo = span.lo + BytePos(begin as u32); |
| |
| (&doc[begin..end], span) |
| } |
| } |
| |
| impl<'a> Iterator for Parser<'a> { |
| type Item = (bool, char); |
| |
| fn next(&mut self) -> Option<(bool, char)> { |
| while self.line < self.docs.len() { |
| if self.reset { |
| self.line += 1; |
| self.reset = false; |
| self.pos = 0; |
| self.current_word_begin = 0; |
| } |
| |
| let mut chars = self.line().0[self.pos..].chars(); |
| let c = chars.next(); |
| |
| if let Some(c) = c { |
| self.pos += c.len_utf8(); |
| let new_line = self.new_line; |
| self.new_line = c == '\n' || (self.new_line && c.is_whitespace()); |
| return Some((new_line, c)); |
| } else if self.line == self.docs.len() - 1 { |
| return None; |
| } else { |
| self.new_line = true; |
| self.reset = true; |
| self.pos += 1; |
| return Some((true, '\n')); |
| } |
| } |
| |
| None |
| } |
| } |
| |
| let mut parser = Parser { |
| current_word_begin: 0, |
| docs: docs, |
| line: 0, |
| link: false, |
| new_line: true, |
| reset: false, |
| pos: 0, |
| }; |
| |
| /// Check for fanced code block. |
| macro_rules! check_block { |
| ($parser:expr, $c:tt, $new_line:expr) => {{ |
| check_block!($parser, $c, $c, $new_line) |
| }}; |
| |
| ($parser:expr, $c:pat, $c_expr:expr, $new_line:expr) => {{ |
| fn check_block(parser: &mut Parser, new_line: bool) -> Result<bool, ()> { |
| if new_line { |
| let mut lookup_parser = parser.clone(); |
| if let (Some((false, $c)), Some((false, $c))) = (lookup_parser.next(), lookup_parser.next()) { |
| *parser = lookup_parser; |
| // 3 or more ` or ~ open a code block to be closed with the same number of ` or ~ |
| let mut open_count = 3; |
| while let Some((false, $c)) = parser.next() { |
| open_count += 1; |
| } |
| |
| loop { |
| loop { |
| if try!(parser.jump_to($c_expr)) { |
| break; |
| } |
| } |
| |
| lookup_parser = parser.clone(); |
| let a = lookup_parser.next(); |
| let b = lookup_parser.next(); |
| if let (Some((false, $c)), Some((false, $c))) = (a, b) { |
| let mut close_count = 3; |
| while let Some((false, $c)) = lookup_parser.next() { |
| close_count += 1; |
| } |
| |
| if close_count == open_count { |
| *parser = lookup_parser; |
| return Ok(true); |
| } |
| } |
| } |
| } |
| } |
| |
| Ok(false) |
| } |
| |
| check_block(&mut $parser, $new_line) |
| }}; |
| } |
| |
| loop { |
| match parser.next() { |
| Some((new_line, c)) => { |
| match c { |
| '#' if new_line => { |
| // don’t warn on titles |
| parser.next_line(); |
| }, |
| '`' => { |
| if try!(check_block!(parser, '`', new_line)) { |
| continue; |
| } |
| |
| // not a code block, just inline code |
| try!(parser.jump_to('`')); |
| }, |
| '~' => { |
| if try!(check_block!(parser, '~', new_line)) { |
| continue; |
| } |
| |
| // ~ does not introduce inline code, but two of them introduce |
| // strikethrough. Too bad for the consistency but we don't care about |
| // strikethrough. |
| }, |
| '[' => { |
| // Check for a reference definition `[foo]:` at the beginning of a line |
| let mut link = true; |
| |
| if new_line { |
| let mut lookup_parser = parser.clone(); |
| if lookup_parser.any(|(_, c)| c == ']') { |
| if let Some((_, ':')) = lookup_parser.next() { |
| lookup_parser.next_line(); |
| parser = lookup_parser; |
| link = false; |
| } |
| } |
| } |
| |
| parser.advance_begin(); |
| parser.link = link; |
| }, |
| ']' if parser.link => { |
| parser.link = false; |
| |
| match parser.peek() { |
| Some('(') => { |
| try!(parser.jump_to(')')); |
| }, |
| Some('[') => { |
| try!(parser.jump_to(']')); |
| }, |
| Some(_) => continue, |
| None => return Err(()), |
| } |
| }, |
| c if !is_path_char(c) => { |
| parser.advance_begin(); |
| }, |
| _ => { |
| if let Some((_, c)) = parser.find(|&(_, c)| !is_path_char(c)) { |
| parser.put_back(c); |
| } |
| |
| let (word, span) = parser.word(); |
| check_word(cx, valid_idents, word, span); |
| parser.advance_begin(); |
| }, |
| } |
| |
| }, |
| None => break, |
| } |
| } |
| |
| Ok(()) |
| } |
| |
| fn check_word(cx: &EarlyContext, valid_idents: &[String], word: &str, span: Span) { |
| /// Checks if a string a camel-case, ie. contains at least two uppercase letter (`Clippy` is |
| /// ok) and one lower-case letter (`NASA` is ok). Plural are also excluded (`IDs` is ok). |
| fn is_camel_case(s: &str) -> bool { |
| if s.starts_with(|c: char| c.is_digit(10)) { |
| return false; |
| } |
| |
| let s = if s.ends_with('s') { |
| &s[..s.len() - 1] |
| } else { |
| s |
| }; |
| |
| s.chars().all(char::is_alphanumeric) && s.chars().filter(|&c| c.is_uppercase()).take(2).count() > 1 && |
| s.chars().filter(|&c| c.is_lowercase()).take(1).count() > 0 |
| } |
| |
| fn has_underscore(s: &str) -> bool { |
| s != "_" && !s.contains("\\_") && s.contains('_') |
| } |
| |
| // Trim punctuation as in `some comment (see foo::bar).` |
| // ^^ |
| // Or even as in `_foo bar_` which is emphasized. |
| let word = word.trim_matches(|c: char| !c.is_alphanumeric()); |
| |
| if valid_idents.iter().any(|i| i == word) { |
| return; |
| } |
| |
| if has_underscore(word) || word.contains("::") || is_camel_case(word) { |
| span_lint(cx, |
| DOC_MARKDOWN, |
| span, |
| &format!("you should put `{}` between ticks in the documentation", word)); |
| } |
| } |