| // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT |
| // file at the top-level directory of this distribution and at |
| // http://rust-lang.org/COPYRIGHT. |
| // |
| // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| // option. This file may not be copied, modified, or distributed |
| // except according to those terms. |
| |
| use core::prelude::*; |
| |
| use ast; |
| use codemap::{BytePos, CharPos, CodeMap, Pos}; |
| use diagnostic; |
| use parse::lexer::{is_whitespace, get_str_from, reader}; |
| use parse::lexer::{StringReader, bump, is_eof, nextch, TokenAndSpan}; |
| use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment}; |
| use parse::lexer; |
| use parse::token; |
| use parse; |
| |
| use core::io::ReaderUtil; |
| use core::io; |
| use core::str; |
| use core::uint; |
| use core::vec; |
| |
| #[deriving(Eq)] |
| pub enum cmnt_style { |
| isolated, // No code on either side of each line of the comment |
| trailing, // Code exists to the left of the comment |
| mixed, // Code before /* foo */ and after the comment |
| blank_line, // Just a manual blank line "\n\n", for layout |
| } |
| |
| pub struct cmnt { |
| style: cmnt_style, |
| lines: ~[~str], |
| pos: BytePos |
| } |
| |
| pub fn is_doc_comment(s: &str) -> bool { |
| (s.starts_with(~"///") && !is_line_non_doc_comment(s)) || |
| s.starts_with(~"//!") || |
| (s.starts_with(~"/**") && !is_block_non_doc_comment(s)) || |
| s.starts_with(~"/*!") |
| } |
| |
| pub fn doc_comment_style(comment: &str) -> ast::attr_style { |
| assert!(is_doc_comment(comment)); |
| if comment.starts_with(~"//!") || comment.starts_with(~"/*!") { |
| ast::attr_inner |
| } else { |
| ast::attr_outer |
| } |
| } |
| |
| pub fn strip_doc_comment_decoration(comment: &str) -> ~str { |
| |
| /// remove whitespace-only lines from the start/end of lines |
| fn vertical_trim(lines: ~[~str]) -> ~[~str] { |
| let mut i = 0u, j = lines.len(); |
| while i < j && lines[i].trim().is_empty() { |
| i += 1u; |
| } |
| while j > i && lines[j - 1u].trim().is_empty() { |
| j -= 1u; |
| } |
| return lines.slice(i, j).to_owned(); |
| } |
| |
| // drop leftmost columns that contain only values in chars |
| fn block_trim(lines: ~[~str], chars: ~str, max: Option<uint>) -> ~[~str] { |
| |
| let mut i = max.get_or_default(uint::max_value); |
| for lines.each |line| { |
| if line.trim().is_empty() { |
| loop; |
| } |
| for line.each_chari |j, c| { |
| if j >= i { |
| break; |
| } |
| if !chars.contains_char(c) { |
| i = j; |
| break; |
| } |
| } |
| } |
| |
| return do lines.map |line| { |
| let mut chars = ~[]; |
| for str::each_char(*line) |c| { chars.push(c) } |
| if i > chars.len() { |
| ~"" |
| } else { |
| str::from_chars(chars.slice(i, chars.len()).to_owned()) |
| } |
| }; |
| } |
| |
| if comment.starts_with(~"//") { |
| // FIXME #5475: |
| // return comment.slice(3u, comment.len()).trim().to_owned(); |
| let r = comment.slice(3u, comment.len()); return r.trim().to_owned(); |
| |
| } |
| |
| if comment.starts_with(~"/*") { |
| let mut lines = ~[]; |
| for str::each_line_any(comment.slice(3u, comment.len() - 2u)) |line| { |
| lines.push(line.to_owned()) |
| } |
| let lines = vertical_trim(lines); |
| let lines = block_trim(lines, ~"\t ", None); |
| let lines = block_trim(lines, ~"*", Some(1u)); |
| let lines = block_trim(lines, ~"\t ", None); |
| return str::connect(lines, ~"\n"); |
| } |
| |
| fail!(~"not a doc-comment: " + comment); |
| } |
| |
| fn read_to_eol(rdr: @mut StringReader) -> ~str { |
| let mut val = ~""; |
| while rdr.curr != '\n' && !is_eof(rdr) { |
| str::push_char(&mut val, rdr.curr); |
| bump(rdr); |
| } |
| if rdr.curr == '\n' { bump(rdr); } |
| return val; |
| } |
| |
| fn read_one_line_comment(rdr: @mut StringReader) -> ~str { |
| let val = read_to_eol(rdr); |
| assert!((val[0] == '/' as u8 && val[1] == '/' as u8) || |
| (val[0] == '#' as u8 && val[1] == '!' as u8)); |
| return val; |
| } |
| |
| fn consume_non_eol_whitespace(rdr: @mut StringReader) { |
| while is_whitespace(rdr.curr) && rdr.curr != '\n' && !is_eof(rdr) { |
| bump(rdr); |
| } |
| } |
| |
| fn push_blank_line_comment(rdr: @mut StringReader, comments: &mut ~[cmnt]) { |
| debug!(">>> blank-line comment"); |
| let v: ~[~str] = ~[]; |
| comments.push(cmnt {style: blank_line, lines: v, pos: rdr.last_pos}); |
| } |
| |
| fn consume_whitespace_counting_blank_lines(rdr: @mut StringReader, |
| comments: &mut ~[cmnt]) { |
| while is_whitespace(rdr.curr) && !is_eof(rdr) { |
| if rdr.col == CharPos(0u) && rdr.curr == '\n' { |
| push_blank_line_comment(rdr, &mut *comments); |
| } |
| bump(rdr); |
| } |
| } |
| |
| |
| fn read_shebang_comment(rdr: @mut StringReader, code_to_the_left: bool, |
| comments: &mut ~[cmnt]) { |
| debug!(">>> shebang comment"); |
| let p = rdr.last_pos; |
| debug!("<<< shebang comment"); |
| comments.push(cmnt { |
| style: if code_to_the_left { trailing } else { isolated }, |
| lines: ~[read_one_line_comment(rdr)], |
| pos: p |
| }); |
| } |
| |
| fn read_line_comments(rdr: @mut StringReader, code_to_the_left: bool, |
| comments: &mut ~[cmnt]) { |
| debug!(">>> line comments"); |
| let p = rdr.last_pos; |
| let mut lines: ~[~str] = ~[]; |
| while rdr.curr == '/' && nextch(rdr) == '/' { |
| let line = read_one_line_comment(rdr); |
| debug!("%s", line); |
| if is_doc_comment(line) { // doc-comments are not put in comments |
| break; |
| } |
| lines.push(line); |
| consume_non_eol_whitespace(rdr); |
| } |
| debug!("<<< line comments"); |
| if !lines.is_empty() { |
| comments.push(cmnt { |
| style: if code_to_the_left { trailing } else { isolated }, |
| lines: lines, |
| pos: p |
| }); |
| } |
| } |
| |
| // FIXME #3961: This is not the right way to convert string byte |
| // offsets to characters. |
| fn all_whitespace(s: ~str, begin: uint, end: uint) -> bool { |
| let mut i: uint = begin; |
| while i != end { |
| if !is_whitespace(s[i] as char) { return false; } i += 1u; |
| } |
| return true; |
| } |
| |
| fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str], |
| s: ~str, col: CharPos) { |
| let mut s1; |
| let len = str::len(s); |
| // FIXME #3961: Doing bytewise comparison and slicing with CharPos |
| let col = col.to_uint(); |
| if all_whitespace(s, 0u, uint::min(len, col)) { |
| if col < len { |
| s1 = str::slice(s, col, len).to_owned(); |
| } else { s1 = ~""; } |
| } else { s1 = s; } |
| debug!("pushing line: %s", s1); |
| lines.push(s1); |
| } |
| |
| fn read_block_comment(rdr: @mut StringReader, |
| code_to_the_left: bool, |
| comments: &mut ~[cmnt]) { |
| debug!(">>> block comment"); |
| let p = rdr.last_pos; |
| let mut lines: ~[~str] = ~[]; |
| let mut col: CharPos = rdr.col; |
| bump(rdr); |
| bump(rdr); |
| |
| let mut curr_line = ~"/*"; |
| |
| // doc-comments are not really comments, they are attributes |
| if rdr.curr == '*' || rdr.curr == '!' { |
| while !(rdr.curr == '*' && nextch(rdr) == '/') && !is_eof(rdr) { |
| str::push_char(&mut curr_line, rdr.curr); |
| bump(rdr); |
| } |
| if !is_eof(rdr) { |
| curr_line += ~"*/"; |
| bump(rdr); |
| bump(rdr); |
| } |
| if !is_block_non_doc_comment(curr_line) { return; } |
| assert!(!curr_line.contains_char('\n')); |
| lines.push(curr_line); |
| } else { |
| let mut level: int = 1; |
| while level > 0 { |
| debug!("=== block comment level %d", level); |
| if is_eof(rdr) { |
| (rdr as @reader).fatal(~"unterminated block comment"); |
| } |
| if rdr.curr == '\n' { |
| trim_whitespace_prefix_and_push_line(&mut lines, curr_line, |
| col); |
| curr_line = ~""; |
| bump(rdr); |
| } else { |
| str::push_char(&mut curr_line, rdr.curr); |
| if rdr.curr == '/' && nextch(rdr) == '*' { |
| bump(rdr); |
| bump(rdr); |
| curr_line += ~"*"; |
| level += 1; |
| } else { |
| if rdr.curr == '*' && nextch(rdr) == '/' { |
| bump(rdr); |
| bump(rdr); |
| curr_line += ~"/"; |
| level -= 1; |
| } else { bump(rdr); } |
| } |
| } |
| } |
| if str::len(curr_line) != 0 { |
| trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col); |
| } |
| } |
| |
| let mut style = if code_to_the_left { trailing } else { isolated }; |
| consume_non_eol_whitespace(rdr); |
| if !is_eof(rdr) && rdr.curr != '\n' && vec::len(lines) == 1u { |
| style = mixed; |
| } |
| debug!("<<< block comment"); |
| comments.push(cmnt {style: style, lines: lines, pos: p}); |
| } |
| |
| fn peeking_at_comment(rdr: @mut StringReader) -> bool { |
| return ((rdr.curr == '/' && nextch(rdr) == '/') || |
| (rdr.curr == '/' && nextch(rdr) == '*')) || |
| (rdr.curr == '#' && nextch(rdr) == '!'); |
| } |
| |
| fn consume_comment(rdr: @mut StringReader, |
| code_to_the_left: bool, |
| comments: &mut ~[cmnt]) { |
| debug!(">>> consume comment"); |
| if rdr.curr == '/' && nextch(rdr) == '/' { |
| read_line_comments(rdr, code_to_the_left, comments); |
| } else if rdr.curr == '/' && nextch(rdr) == '*' { |
| read_block_comment(rdr, code_to_the_left, comments); |
| } else if rdr.curr == '#' && nextch(rdr) == '!' { |
| read_shebang_comment(rdr, code_to_the_left, comments); |
| } else { fail!(); } |
| debug!("<<< consume comment"); |
| } |
| |
| pub struct lit { |
| lit: ~str, |
| pos: BytePos |
| } |
| |
| pub fn gather_comments_and_literals(span_diagnostic: |
| @diagnostic::span_handler, |
| +path: ~str, |
| srdr: @io::Reader) |
| -> (~[cmnt], ~[lit]) { |
| let src = @str::from_bytes(srdr.read_whole_stream()); |
| let itr = parse::token::mk_fake_ident_interner(); |
| let cm = CodeMap::new(); |
| let filemap = cm.new_filemap(path, src); |
| let rdr = lexer::new_low_level_string_reader(span_diagnostic, |
| filemap, |
| itr); |
| |
| let mut comments: ~[cmnt] = ~[]; |
| let mut literals: ~[lit] = ~[]; |
| let mut first_read: bool = true; |
| while !is_eof(rdr) { |
| loop { |
| let mut code_to_the_left = !first_read; |
| consume_non_eol_whitespace(rdr); |
| if rdr.curr == '\n' { |
| code_to_the_left = false; |
| consume_whitespace_counting_blank_lines(rdr, &mut comments); |
| } |
| while peeking_at_comment(rdr) { |
| consume_comment(rdr, code_to_the_left, &mut comments); |
| consume_whitespace_counting_blank_lines(rdr, &mut comments); |
| } |
| break; |
| } |
| |
| |
| let bstart = rdr.pos; |
| rdr.next_token(); |
| //discard, and look ahead; we're working with internal state |
| let TokenAndSpan {tok: tok, sp: sp} = rdr.peek(); |
| if token::is_lit(&tok) { |
| let s = get_str_from(rdr, bstart); |
| debug!("tok lit: %s", s); |
| literals.push(lit {lit: s, pos: sp.lo}); |
| } else { |
| debug!("tok: %s", token::to_str(rdr.interner, &tok)); |
| } |
| first_read = false; |
| } |
| |
| (comments, literals) |
| } |