|  | use expect_test::{Expect, expect}; | 
|  |  | 
|  | use super::*; | 
|  |  | 
|  | fn check_raw_str(s: &str, expected: Result<u8, RawStrError>) { | 
|  | let s = &format!("r{}", s); | 
|  | let mut cursor = Cursor::new(s, FrontmatterAllowed::No); | 
|  | cursor.bump(); | 
|  | let res = cursor.raw_double_quoted_string(0); | 
|  | assert_eq!(res, expected); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn test_naked_raw_str() { | 
|  | check_raw_str(r#""abc""#, Ok(0)); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn test_raw_no_start() { | 
|  | check_raw_str(r##""abc"#"##, Ok(0)); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn test_too_many_terminators() { | 
|  | // this error is handled in the parser later | 
|  | check_raw_str(r###"#"abc"##"###, Ok(1)); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn test_unterminated() { | 
|  | check_raw_str( | 
|  | r#"#"abc"#, | 
|  | Err(RawStrError::NoTerminator { expected: 1, found: 0, possible_terminator_offset: None }), | 
|  | ); | 
|  | check_raw_str( | 
|  | r###"##"abc"#"###, | 
|  | Err(RawStrError::NoTerminator { | 
|  | expected: 2, | 
|  | found: 1, | 
|  | possible_terminator_offset: Some(7), | 
|  | }), | 
|  | ); | 
|  | // We're looking for "# not just any # | 
|  | check_raw_str( | 
|  | r###"##"abc#"###, | 
|  | Err(RawStrError::NoTerminator { expected: 2, found: 0, possible_terminator_offset: None }), | 
|  | ) | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn test_invalid_start() { | 
|  | check_raw_str(r##"#~"abc"#"##, Err(RawStrError::InvalidStarter { bad_char: '~' })); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn test_unterminated_no_pound() { | 
|  | // https://github.com/rust-lang/rust/issues/70677 | 
|  | check_raw_str( | 
|  | r#"""#, | 
|  | Err(RawStrError::NoTerminator { expected: 0, found: 0, possible_terminator_offset: None }), | 
|  | ); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn test_too_many_hashes() { | 
|  | let max_count = u8::MAX; | 
|  | let hashes1 = "#".repeat(max_count as usize); | 
|  | let hashes2 = "#".repeat(max_count as usize + 1); | 
|  | let middle = "\"abc\""; | 
|  | let s1 = [&hashes1, middle, &hashes1].join(""); | 
|  | let s2 = [&hashes2, middle, &hashes2].join(""); | 
|  |  | 
|  | // Valid number of hashes (255 = 2^8 - 1 = u8::MAX). | 
|  | check_raw_str(&s1, Ok(255)); | 
|  |  | 
|  | // One more hash sign (256 = 2^8) becomes too many. | 
|  | check_raw_str(&s2, Err(RawStrError::TooManyDelimiters { found: u32::from(max_count) + 1 })); | 
|  | } | 
|  |  | 
|  | // https://github.com/rust-lang/rust/issues/70528 | 
|  | #[test] | 
|  | fn test_valid_shebang() { | 
|  | let input = "#!/bin/bash"; | 
|  | assert_eq!(strip_shebang(input), Some(input.len())); | 
|  |  | 
|  | let input = "#![attribute]"; | 
|  | assert_eq!(strip_shebang(input), None); | 
|  |  | 
|  | let input = "#!    /bin/bash"; | 
|  | assert_eq!(strip_shebang(input), Some(input.len())); | 
|  |  | 
|  | let input = "#!    [attribute]"; | 
|  | assert_eq!(strip_shebang(input), None); | 
|  |  | 
|  | let input = "#! /* blah */  /bin/bash"; | 
|  | assert_eq!(strip_shebang(input), Some(input.len())); | 
|  |  | 
|  | let input = "#! /* blah */  [attribute]"; | 
|  | assert_eq!(strip_shebang(input), None); | 
|  |  | 
|  | let input = "#! // blah\n/bin/bash"; | 
|  | assert_eq!(strip_shebang(input), Some(10)); // strip up to the newline | 
|  |  | 
|  | let input = "#! // blah\n[attribute]"; | 
|  | assert_eq!(strip_shebang(input), None); | 
|  |  | 
|  | let input = "#! /* blah\nblah\nblah */  /bin/bash"; | 
|  | assert_eq!(strip_shebang(input), Some(10)); | 
|  |  | 
|  | let input = "#! /* blah\nblah\nblah */  [attribute]"; | 
|  | assert_eq!(strip_shebang(input), None); | 
|  |  | 
|  | let input = "#!\n/bin/sh"; | 
|  | assert_eq!(strip_shebang(input), Some(2)); | 
|  |  | 
|  | let input = "#!\n[attribute]"; | 
|  | assert_eq!(strip_shebang(input), None); | 
|  |  | 
|  | // Because shebangs are interpreted by the kernel, they must be on the first line | 
|  | let input = "\n#!/bin/bash"; | 
|  | assert_eq!(strip_shebang(input), None); | 
|  |  | 
|  | let input = "\n#![attribute]"; | 
|  | assert_eq!(strip_shebang(input), None); | 
|  | } | 
|  |  | 
|  | fn check_lexing(src: &str, frontmatter_allowed: FrontmatterAllowed, expect: Expect) { | 
|  | let actual: String = | 
|  | tokenize(src, frontmatter_allowed).map(|token| format!("{:?}\n", token)).collect(); | 
|  | expect.assert_eq(&actual) | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn smoke_test() { | 
|  | check_lexing( | 
|  | "/* my source file */ fn main() { println!(\"zebra\"); }\n", | 
|  | FrontmatterAllowed::No, | 
|  | expect![[r#" | 
|  | Token { kind: BlockComment { doc_style: None, terminated: true }, len: 20 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Ident, len: 2 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Ident, len: 4 } | 
|  | Token { kind: OpenParen, len: 1 } | 
|  | Token { kind: CloseParen, len: 1 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: OpenBrace, len: 1 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Ident, len: 7 } | 
|  | Token { kind: Bang, len: 1 } | 
|  | Token { kind: OpenParen, len: 1 } | 
|  | Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 7 }, len: 7 } | 
|  | Token { kind: CloseParen, len: 1 } | 
|  | Token { kind: Semi, len: 1 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: CloseBrace, len: 1 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | "#]], | 
|  | ) | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn comment_flavors() { | 
|  | check_lexing( | 
|  | r" | 
|  | // line | 
|  | //// line as well | 
|  | /// outer doc line | 
|  | //! inner doc line | 
|  | /* block */ | 
|  | /**/ | 
|  | /*** also block */ | 
|  | /** outer doc block */ | 
|  | /*! inner doc block */ | 
|  | ", | 
|  | FrontmatterAllowed::No, | 
|  | expect![[r#" | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: LineComment { doc_style: None }, len: 7 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: LineComment { doc_style: None }, len: 17 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: LineComment { doc_style: Some(Outer) }, len: 18 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: LineComment { doc_style: Some(Inner) }, len: 18 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: BlockComment { doc_style: None, terminated: true }, len: 4 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: BlockComment { doc_style: None, terminated: true }, len: 18 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: BlockComment { doc_style: Some(Outer), terminated: true }, len: 22 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: BlockComment { doc_style: Some(Inner), terminated: true }, len: 22 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | "#]], | 
|  | ) | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn nested_block_comments() { | 
|  | check_lexing( | 
|  | "/* /* */ */'a'", | 
|  | FrontmatterAllowed::No, | 
|  | expect![[r#" | 
|  | Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } | 
|  | Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } | 
|  | "#]], | 
|  | ) | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn characters() { | 
|  | check_lexing( | 
|  | "'a' ' ' '\\n'", | 
|  | FrontmatterAllowed::No, | 
|  | expect![[r#" | 
|  | Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 4 }, len: 4 } | 
|  | "#]], | 
|  | ); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn lifetime() { | 
|  | check_lexing( | 
|  | "'abc", | 
|  | FrontmatterAllowed::No, | 
|  | expect![[r#" | 
|  | Token { kind: Lifetime { starts_with_number: false }, len: 4 } | 
|  | "#]], | 
|  | ); | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn raw_string() { | 
|  | check_lexing( | 
|  | "r###\"\"#a\\b\x00c\"\"###", | 
|  | FrontmatterAllowed::No, | 
|  | expect![[r#" | 
|  | Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 17 }, len: 17 } | 
|  | "#]], | 
|  | ) | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn literal_suffixes() { | 
|  | check_lexing( | 
|  | r####" | 
|  | 'a' | 
|  | b'a' | 
|  | "a" | 
|  | b"a" | 
|  | 1234 | 
|  | 0b101 | 
|  | 0xABC | 
|  | 1.0 | 
|  | 1.0e10 | 
|  | 2us | 
|  | r###"raw"###suffix | 
|  | br###"raw"###suffix | 
|  | "####, | 
|  | FrontmatterAllowed::No, | 
|  | expect![[r#" | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: Byte { terminated: true }, suffix_start: 4 }, len: 4 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 3 }, len: 3 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: ByteStr { terminated: true }, suffix_start: 4 }, len: 4 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 4 }, len: 4 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: Int { base: Binary, empty_int: false }, suffix_start: 5 }, len: 5 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: Int { base: Hexadecimal, empty_int: false }, suffix_start: 5 }, len: 5 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: Float { base: Decimal, empty_exponent: false }, suffix_start: 3 }, len: 3 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: Float { base: Decimal, empty_exponent: false }, suffix_start: 6 }, len: 6 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 }, len: 3 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 12 }, len: 18 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: RawByteStr { n_hashes: Some(3) }, suffix_start: 13 }, len: 19 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | "#]], | 
|  | ) | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn frontmatter_allowed() { | 
|  | check_lexing( | 
|  | r#" | 
|  | ---cargo | 
|  | [dependencies] | 
|  | clap = "4" | 
|  | --- | 
|  |  | 
|  | fn main() {} | 
|  | "#, | 
|  | FrontmatterAllowed::Yes, | 
|  | expect![[r#" | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Frontmatter { has_invalid_preceding_whitespace: false, invalid_infostring: false }, len: 38 } | 
|  | Token { kind: Whitespace, len: 2 } | 
|  | Token { kind: Ident, len: 2 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Ident, len: 4 } | 
|  | Token { kind: OpenParen, len: 1 } | 
|  | Token { kind: CloseParen, len: 1 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: OpenBrace, len: 1 } | 
|  | Token { kind: CloseBrace, len: 1 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | "#]], | 
|  | ) | 
|  | } | 
|  |  | 
|  | #[test] | 
|  | fn frontmatter_disallowed() { | 
|  | check_lexing( | 
|  | r#" | 
|  | ---cargo | 
|  | [dependencies] | 
|  | clap = "4" | 
|  | --- | 
|  |  | 
|  | fn main() {} | 
|  | "#, | 
|  | FrontmatterAllowed::No, | 
|  | expect![[r#" | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Minus, len: 1 } | 
|  | Token { kind: Minus, len: 1 } | 
|  | Token { kind: Minus, len: 1 } | 
|  | Token { kind: Ident, len: 5 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: OpenBracket, len: 1 } | 
|  | Token { kind: Ident, len: 12 } | 
|  | Token { kind: CloseBracket, len: 1 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Ident, len: 4 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Eq, len: 1 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 3 }, len: 3 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Minus, len: 1 } | 
|  | Token { kind: Minus, len: 1 } | 
|  | Token { kind: Minus, len: 1 } | 
|  | Token { kind: Whitespace, len: 2 } | 
|  | Token { kind: Ident, len: 2 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: Ident, len: 4 } | 
|  | Token { kind: OpenParen, len: 1 } | 
|  | Token { kind: CloseParen, len: 1 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | Token { kind: OpenBrace, len: 1 } | 
|  | Token { kind: CloseBrace, len: 1 } | 
|  | Token { kind: Whitespace, len: 1 } | 
|  | "#]], | 
|  | ) | 
|  | } |