blob: 6dc003f326de783c876f829076477d23ef0c0dae [file] [log] [blame] [edit]
use core::slice;
use rustc_lexer::{self as lex, LiteralKind, Token, TokenKind};
/// A token pattern used for searching and matching by the [`Cursor`].
///
/// In the event that a pattern is a multi-token sequence, earlier tokens will be consumed
/// even if the pattern ultimately isn't matched. e.g. With the sequence `:*` matching
/// `DoubleColon` will consume the first `:` and then fail to match, leaving the cursor at
/// the `*`.
#[derive(Clone, Copy)]
pub enum Pat<'a> {
/// Matches any number of comments and doc comments.
AnyComment,
Ident(&'a str),
CaptureIdent,
LitStr,
CaptureLitStr,
Bang,
CloseBrace,
CloseBracket,
CloseParen,
Comma,
DoubleColon,
Eq,
Lifetime,
Lt,
Gt,
OpenBrace,
OpenBracket,
OpenParen,
Pound,
Semi,
}
#[derive(Clone, Copy)]
pub struct Capture {
pub pos: u32,
pub len: u32,
}
impl Capture {
pub const EMPTY: Self = Self { pos: 0, len: 0 };
}
/// A unidirectional cursor over a token stream that is lexed on demand.
pub struct Cursor<'txt> {
next_token: Token,
pos: u32,
inner: lex::Cursor<'txt>,
text: &'txt str,
}
impl<'txt> Cursor<'txt> {
#[must_use]
pub fn new(text: &'txt str) -> Self {
let mut inner = lex::Cursor::new(text, lex::FrontmatterAllowed::Yes);
Self {
next_token: inner.advance_token(),
pos: 0,
inner,
text,
}
}
/// Gets the text of the captured token assuming it came from this cursor.
#[must_use]
pub fn get_text(&self, capture: Capture) -> &'txt str {
&self.text[capture.pos as usize..(capture.pos + capture.len) as usize]
}
/// Gets the text that makes up the next token in the stream, or the empty string if
/// stream is exhausted.
#[must_use]
pub fn peek_text(&self) -> &'txt str {
&self.text[self.pos as usize..(self.pos + self.next_token.len) as usize]
}
/// Gets the length of the next token in bytes, or zero if the stream is exhausted.
#[must_use]
pub fn peek_len(&self) -> u32 {
self.next_token.len
}
/// Gets the next token in the stream, or [`TokenKind::Eof`] if the stream is
/// exhausted.
#[must_use]
pub fn peek(&self) -> TokenKind {
self.next_token.kind
}
/// Gets the offset of the next token in the source string, or the string's length if
/// the stream is exhausted.
#[must_use]
pub fn pos(&self) -> u32 {
self.pos
}
/// Gets whether the cursor has exhausted its input.
#[must_use]
pub fn at_end(&self) -> bool {
self.next_token.kind == TokenKind::Eof
}
/// Advances the cursor to the next token. If the stream is exhausted this will set
/// the next token to [`TokenKind::Eof`].
pub fn step(&mut self) {
// `next_token.len` is zero for the eof marker.
self.pos += self.next_token.len;
self.next_token = self.inner.advance_token();
}
/// Consumes tokens until the given pattern is either fully matched of fails to match.
/// Returns whether the pattern was fully matched.
///
/// For each capture made by the pattern one item will be taken from the capture
/// sequence with the result placed inside.
fn match_impl(&mut self, pat: Pat<'_>, captures: &mut slice::IterMut<'_, Capture>) -> bool {
loop {
match (pat, self.next_token.kind) {
#[rustfmt::skip] // rustfmt bug: https://github.com/rust-lang/rustfmt/issues/6697
(_, TokenKind::Whitespace)
| (
Pat::AnyComment,
TokenKind::BlockComment { terminated: true, .. } | TokenKind::LineComment { .. },
) => self.step(),
(Pat::AnyComment, _) => return true,
(Pat::Bang, TokenKind::Bang)
| (Pat::CloseBrace, TokenKind::CloseBrace)
| (Pat::CloseBracket, TokenKind::CloseBracket)
| (Pat::CloseParen, TokenKind::CloseParen)
| (Pat::Comma, TokenKind::Comma)
| (Pat::Eq, TokenKind::Eq)
| (Pat::Lifetime, TokenKind::Lifetime { .. })
| (Pat::Lt, TokenKind::Lt)
| (Pat::Gt, TokenKind::Gt)
| (Pat::OpenBrace, TokenKind::OpenBrace)
| (Pat::OpenBracket, TokenKind::OpenBracket)
| (Pat::OpenParen, TokenKind::OpenParen)
| (Pat::Pound, TokenKind::Pound)
| (Pat::Semi, TokenKind::Semi)
| (
Pat::LitStr,
TokenKind::Literal {
kind: LiteralKind::Str { terminated: true } | LiteralKind::RawStr { .. },
..
},
) => {
self.step();
return true;
},
(Pat::Ident(x), TokenKind::Ident) if x == self.peek_text() => {
self.step();
return true;
},
(Pat::DoubleColon, TokenKind::Colon) => {
self.step();
if !self.at_end() && matches!(self.next_token.kind, TokenKind::Colon) {
self.step();
return true;
}
return false;
},
#[rustfmt::skip]
(
Pat::CaptureLitStr,
TokenKind::Literal {
kind:
LiteralKind::Str { terminated: true }
| LiteralKind::RawStr { n_hashes: Some(_) },
..
},
)
| (Pat::CaptureIdent, TokenKind::Ident) => {
*captures.next().unwrap() = Capture { pos: self.pos, len: self.next_token.len };
self.step();
return true;
},
_ => return false,
}
}
}
/// Consumes all tokens until the specified identifier is found and returns its
/// position. Returns `None` if the identifier could not be found.
///
/// The cursor will be positioned immediately after the identifier, or at the end if
/// it is not.
pub fn find_ident(&mut self, ident: &str) -> Option<u32> {
loop {
match self.next_token.kind {
TokenKind::Ident if self.peek_text() == ident => {
let pos = self.pos;
self.step();
return Some(pos);
},
TokenKind::Eof => return None,
_ => self.step(),
}
}
}
/// Consumes all tokens until the next identifier is found and captures it. Returns
/// `None` if no identifier could be found.
///
/// The cursor will be positioned immediately after the identifier, or at the end if
/// it is not.
pub fn find_any_ident(&mut self) -> Option<Capture> {
loop {
match self.next_token.kind {
TokenKind::Ident => {
let res = Capture {
pos: self.pos,
len: self.next_token.len,
};
self.step();
return Some(res);
},
TokenKind::Eof => return None,
_ => self.step(),
}
}
}
/// Continually attempt to match the pattern on subsequent tokens until a match is
/// found. Returns whether the pattern was successfully matched.
///
/// Not generally suitable for multi-token patterns or patterns that can match
/// nothing.
#[must_use]
pub fn find_pat(&mut self, pat: Pat<'_>) -> bool {
let mut capture = [].iter_mut();
while !self.match_impl(pat, &mut capture) {
self.step();
if self.at_end() {
return false;
}
}
true
}
/// Attempts to match a sequence of patterns at the current position. Returns whether
/// all patterns were successfully matched.
///
/// Captures will be written to the given slice in the order they're matched. If a
/// capture is matched, but there are no more capture slots this will panic. If the
/// match is completed without filling all the capture slots they will be left
/// unmodified.
///
/// If the match fails the cursor will be positioned at the first failing token.
#[must_use]
pub fn match_all(&mut self, pats: &[Pat<'_>], captures: &mut [Capture]) -> bool {
let mut captures = captures.iter_mut();
pats.iter().all(|&p| self.match_impl(p, &mut captures))
}
/// Attempts to match a single pattern at the current position. Returns whether the
/// pattern was successfully matched.
///
/// If the pattern attempts to capture anything this will panic. If the match fails
/// the cursor will be positioned at the first failing token.
#[must_use]
pub fn match_pat(&mut self, pat: Pat<'_>) -> bool {
self.match_impl(pat, &mut [].iter_mut())
}
}